import spaces
import gradio as gr
import os
from pyannote.audio import Pipeline

# instantiate the pipeline
try:
    pipeline = Pipeline.from_pretrained(
        "pyannote/speaker-diarization-3.1",
        use_auth_token=os.environ["api"]
    )
except Exception as e:
    print(f"Error initializing pipeline: {e}")
    pipeline = None

@spaces.GPU
def process_audio(audio, num_speakers, min_speakers, max_speakers):
    if pipeline is None:
        return "Error: Pipeline not initialized"

    # Read the uploaded audio file
    with open(audio, "rb") as f:
        audio_data = f.read()

    # Save the uploaded audio file to a temporary location
    with open("temp.wav", "wb") as f:
        f.write(audio_data)

    # Use the diarization pipeline to process the audio
    try:
        params = {}
        if num_speakers > 0:
            params["num_speakers"] = num_speakers
        if min_speakers > 0:
            params["min_speakers"] = min_speakers
        if max_speakers > 0:
            params["max_speakers"] = max_speakers

        diarization = pipeline("temp.wav", **params)
    except Exception as e:
        return f"Error processing audio: {e}"

    # Remove the temporary file
    os.remove("temp.wav")

    # Return the diarization output
    return str(diarization)

with gr.Blocks() as demo:
    audio_input = gr.Audio(type="filepath", label="Upload Audio")  
    num_speakers_input = gr.Number(label="Number of Speakers", value=0)
    min_speakers_input = gr.Number(label="Minimum Number of Speakers", value=0)
    max_speakers_input = gr.Number(label="Maximum Number of Speakers", value=0)
    process_button = gr.Button("Process")
    diarization_output = gr.Textbox(label="Diarization Output")

    process_button.click(fn=process_audio, inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input], outputs=diarization_output)

demo.launch()