import gradio as gr import os import torch import io import wavio import numpy as np from pyannote.audio import Pipeline from pyannote.audio import Audio from pyannote.core import Segment pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization-3.1", use_auth_token=os.environ['api']) def process_audio(audio): # Extract the audio data and sample rate from the tuple audio_data, sample_rate = audio # Ensure the audio data is in the correct format audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767) # Convert audio_data to a numpy array audio_data = np.array(audio_data) # Save the uploaded audio file to a temporary location wavio.write("temp.wav", audio_data[:, np.newaxis], sample_rate, sampwidth=2) # Use the diarization pipeline to process the audio diarization = pipeline("temp.wav") # Remove the temporary file os.remove("temp.wav") # Return the diarization output return diarization with gr.Blocks() as demo: audio_input = gr.Audio(label="Upload Audio") process_button = gr.Button("Process") diarization_output = gr.JSON(label="Diarization Output") process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output) demo.launch()