Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,813 Bytes
93d373e 090acab f30c373 7c9216a 18e78ec ba685bf aad12fa fc03567 93d373e fc03567 1c57ed2 9946e7a 7180a69 fc03567 9946e7a 76efec6 0d076a1 9946e7a 7180a69 89d654f 7180a69 0d076a1 fc03567 ce1f6bf 7180a69 fc03567 0d076a1 76efec6 7180a69 0d076a1 db1ee1f aad12fa 1429210 db1ee1f a599ac3 ff42726 a599ac3 2129f6b 0d076a1 1429210 7180a69 2129f6b 0d076a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import torch
import spaces
import gradio as gr
import os
from pyannote.audio import Pipeline
# instantiate the pipeline
try:
pipeline = Pipeline.from_pretrained(
"pyannote/speaker-diarization-3.1",
use_auth_token=os.environ["api"]
)
# Move the pipeline to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)
except Exception as e:
print(f"Error initializing pipeline: {e}")
pipeline = None
def save_audio(audio):
if pipeline is None:
return "Error: Pipeline not initialized"
# Read the uploaded audio file as bytes
with open(audio, "rb") as f:
audio_data = f.read()
# Save the uploaded audio file to a temporary location
with open("temp.wav", "wb") as f:
f.write(audio_data)
return "temp.wav"
@spaces.GPU(duration=90)
def diarize_audio(temp_file, num_speakers, min_speakers, max_speakers):
if pipeline is None:
return "Error: Pipeline not initialized"
try:
params = {}
if num_speakers > 0:
params["num_speakers"] = num_speakers
if min_speakers > 0:
params["min_speakers"] = min_speakers
if max_speakers > 0:
params["max_speakers"] = max_speakers
diarization = pipeline(temp_file, **params)
except Exception as e:
return f"Error processing audio: {e}"
# Remove the temporary file
os.remove(temp_file)
# Return the diarization output
return str(diarization)
with gr.Blocks() as demo:
gr.Markdown("""
# 🗣️Pyannote Speaker Diarization 3.1🗣️
This model takes an audio file as input and outputs the diarization of the speakers in the audio.
Please upload an audio file and adjust the parameters as needed.
The maximum length of the audio file it can process is around **35-40 minutes**.
If you find this space helpful, please ❤ it.
""")
audio_input = gr.Audio(type="filepath", label="Upload Audio")
num_speakers_input = gr.Number(label="Number of Speakers (The maximum number of speakers to detect)", value=0)
min_speakers_input = gr.Number(label="Minimum Number of Speakers (The maximum number of speakers to detect)", value=0)
max_speakers_input = gr.Number(label="Maximum Number of Speakers (The maximum number of speakers to detect)", value=0)
process_button = gr.Button("Process")
diarization_output = gr.Textbox(label="Diarization Output")
process_button.click(
fn=lambda audio, num_speakers, min_speakers, max_speakers:
diarize_audio(save_audio(audio), num_speakers, min_speakers, max_speakers),
inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input],
outputs=diarization_output
)
demo.launch() |