Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,302 Bytes
f30c373 7c9216a 18e78ec f364821 4fa14c8 18e78ec ba685bf 1c57ed2 8289149 f364821 5eede9e a68a1ac fd6daba 76efec6 4fa14c8 76efec6 8289149 76efec6 8289149 db1ee1f 1429210 db1ee1f 8289149 2129f6b db1ee1f 1429210 db1ee1f 2129f6b d73007a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
import os
import torch
import io
import wavio
import numpy as np
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.environ['api'])
def process_audio(audio):
# Extract the audio data and sample rate from the tuple
audio_data, sample_rate = audio
# Ensure the audio data is in the correct format
audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
# Convert audio_data to a numpy array
audio_data = np.array(audio_data)
# Save the uploaded audio file to a temporary location
wavio.write("temp.wav", audio_data[:, np.newaxis], sample_rate, sampwidth=2)
# Use the diarization pipeline to process the audio
diarization = pipeline("temp.wav")
# Remove the temporary file
os.remove("temp.wav")
# Return the diarization output
return diarization
with gr.Blocks() as demo:
audio_input = gr.Audio(label="Upload Audio")
process_button = gr.Button("Process")
diarization_output = gr.JSON(label="Diarization Output")
process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output)
demo.launch()
# |