import gradio as gr model = gr.Interface.load("huggingface/pyannote/voice-activity-detection") #load input file and resample to 16kHz def load_data(path): speech, sampling_rate = librosa.load(path) if len(speech.shape) > 1: speech = speech[:,0] + speech[:,1] if sampling_rate != 16000: speech = librosa.resample(speech, sampling_rate,16000) return speech def inference(path): audio = load_data(path) inputs = gr.inputs.Audio(label="Input Audio", type="filepath", source="microphone") outputs = gr.outputs.Label(type="auto", label = "Voice timestamps") title = "Voice Activity Detection" description = "Record or upload an audio file and detected human voices will be timestamped." article = "