Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import os | |
import torch | |
import io | |
import wavio | |
import numpy as np | |
from pyannote.audio import Pipeline | |
from pyannote.audio import Audio | |
from pyannote.core import Segment | |
pipeline = Pipeline.from_pretrained( | |
"pyannote/speaker-diarization-3.1", | |
use_auth_token=os.environ['api']) | |
def process_audio(audio): | |
# Extract the audio data and sample rate from the tuple | |
audio_data, sample_rate = audio | |
# Ensure the audio data is in the correct format | |
audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767) | |
# Convert audio_data to a numpy array | |
audio_data = np.array(audio_data) | |
# Save the uploaded audio file to a temporary location | |
wavio.write("temp.wav", audio_data[:, np.newaxis], sample_rate, sampwidth=2) | |
# Use the diarization pipeline to process the audio | |
diarization = pipeline("temp.wav") | |
# Remove the temporary file | |
os.remove("temp.wav") | |
# Return the diarization output | |
return diarization | |
with gr.Blocks() as demo: | |
audio_input = gr.Audio(label="Upload Audio") | |
process_button = gr.Button("Process") | |
diarization_output = gr.JSON(label="Diarization Output") | |
process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output) | |
demo.launch() |