from pyannote.audio import Pipeline | |
import gradio as gr | |
def diarization(): | |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") | |
output = pipeline("audio.wav") | |
answer = "" | |
for turn, _, speaker in output.itertracks(yield_label=True): | |
answer += "{} said something starting from {} and ends on {}\n".format(speaker,turn.start,turn.end) | |
return answer | |
app = gr.Interface(fn=diarization, inputs="audio", outputs="text") | |
app.launch() | |