File size: 1,302 Bytes
f30c373
7c9216a
18e78ec
 
f364821
4fa14c8
18e78ec
 
 
ba685bf
1c57ed2
 
 
 
8289149
f364821
 
5eede9e
a68a1ac
 
 
fd6daba
 
 
76efec6
4fa14c8
76efec6
8289149
 
76efec6
 
8289149
db1ee1f
 
 
1429210
db1ee1f
8289149
2129f6b
db1ee1f
1429210
db1ee1f
2129f6b
d73007a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import os
import torch
import io
import wavio
import numpy as np
from pyannote.audio import Pipeline
from pyannote.audio import Audio
from pyannote.core import Segment

pipeline = Pipeline.from_pretrained(
  "pyannote/speaker-diarization-3.1",
  use_auth_token=os.environ['api'])

def process_audio(audio):
    # Extract the audio data and sample rate from the tuple
    audio_data, sample_rate = audio
    
    # Ensure the audio data is in the correct format
    audio_data = np.int16(audio_data / np.max(np.abs(audio_data)) * 32767)
    
    # Convert audio_data to a numpy array
    audio_data = np.array(audio_data)
    
    # Save the uploaded audio file to a temporary location
    wavio.write("temp.wav", audio_data[:, np.newaxis], sample_rate, sampwidth=2)
    
    # Use the diarization pipeline to process the audio
    diarization = pipeline("temp.wav")
    
    # Remove the temporary file
    os.remove("temp.wav")
    
    # Return the diarization output
    return diarization

with gr.Blocks() as demo:
    audio_input = gr.Audio(label="Upload Audio")  
    process_button = gr.Button("Process")
    diarization_output = gr.JSON(label="Diarization Output")

    process_button.click(fn=process_audio, inputs=audio_input, outputs=diarization_output)

demo.launch()
#