Delik's picture
Update app.py
a68a1ac verified
raw
history blame
1.2 kB
import gradio as gr
import os
import torch
import io
import wavio
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
import numpy as np
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.environ['api'])
def process_audio(audio):
# Extract the audio data and sample rate from the tuple
audio_data, sample_rate = audio
# Ensure the audio data is in the correct format
audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Save the uploaded audio file to a temporary location
wavio.write("temp.wav", audio_data, sample_rate, sampwidth=2)
# Use the diarization pipeline to process the audio
diarization = pipeline("temp.wav")
# Remove the temporary file
os.remove("temp.wav")
# Return the diarization output
return diarization
with gr.Blocks() as demo:
audio_input = gr.Audio(label="Upload Audio")
process_button = gr.Button("Process")
diarization_output = gr.JSON(label="Diarization Output")
process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output)
demo.launch()