File size: 3,099 Bytes
1cedc8d
8919734
 
03a5c9f
8919734
 
659de25
8919734
c81c952
1989564
8919734
1989564
659de25
8919734
 
5e6b18d
03a5c9f
 
 
 
8919734
c81c952
8919734
3d27f20
 
 
 
 
 
 
 
 
 
 
 
 
 
1989564
3d27f20
 
 
 
 
 
 
 
 
 
 
1989564
8919734
3d27f20
 
 
 
 
 
 
 
474e9f2
 
 
8919734
 
 
1989564
03a5c9f
 
 
 
 
 
 
 
 
8919734
 
5e6b18d
 
 
 
 
 
8919734
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import spaces
import gradio as gr
from gryannote_audio import AudioLabeling
from gryannote_rttm import RTTM
from pyannote.audio import Pipeline
import os
import torch

@spaces.GPU(duration=120)
def apply_pipeline(audio):
    """Apply specified pipeline on the indicated audio file"""
    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=os.environ["HF_TOKEN"])
    pipeline.to(torch.device("cuda"))
    annotations = pipeline(audio)

    return ((audio, annotations), annotations)


def update_annotations(data):
    return rttm.on_edit(data)


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():

            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown(
                        '<a href="https://github.com/clement-pages/gryannote"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/logo-gryannote.png?raw=true" alt="gryannote logo" width="140"/></a>',
                        )
                with gr.Column(scale=10):
                        gr.Markdown('<h1 style="font-size: 4em;">gryannote</h1>')
                        gr.Markdown() 
                        gr.Markdown('<h2 style="font-size: 2em;">Make the audio labeling process easier and faster! </h2>')

            with gr.Tab("application"):
                gr.Markdown(
                    "To use the component, start by loading or recording audio."
                    "Then apply the diarization pipeline (here [pyannote/speaker-diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1))" 
                    "or double-click directly on the waveform to add an annotations. The annotations produced can be edited."
                    " You can also use keyboard shortcuts to speed things up! Click on the help button to see all the available shortcuts."
                    " Finally, annotations can be saved by cliking on the downloading button in the RTTM component."
                )
                gr.Markdown()
                gr.Markdown()
                audio_labeling = AudioLabeling(
                    type="filepath",
                    interactive=True,
                )

                gr.Markdown()
                gr.Markdown()

                run_btn = gr.Button("Run pipeline")

                rttm = RTTM()

            with gr.Tab("poster"):
                gr.Markdown(
                    '<p align="center"><img src="https://github.com/clement-pages/gryannote/blob/main/docs/assets/poster-interspeech.jpg?raw=true" alt="gryannote poster" width=700em/></p>'
                )

    run_btn.click(
        fn=apply_pipeline,
        inputs=audio_labeling,
        outputs=[audio_labeling, rttm],
    )

    audio_labeling.edit(
        fn=update_annotations,
        inputs=audio_labeling,
        outputs=rttm,
        preprocess=False,
        postprocess=False,
    )

    rttm.upload(
        fn=audio_labeling.load_annotations,
        inputs=[audio_labeling, rttm],
        outputs=audio_labeling,
    )


if __name__ == "__main__":
    demo.launch()