from pyannote.audio import Pipeline import gradio as gr def diarization(): pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") output = pipeline("audio.wav") answer = "" for turn, _, speaker in output.itertracks(yield_label=True): answer += "{} said something starting from {} and ends on {}\n".format(speaker,turn.start,turn.end) return answer app = gr.Interface(fn=diarization, inputs="audio", outputs="text") app.launch()