Spaces:
Configuration error
Configuration error
| import torch | |
| import spaces | |
| import gradio as gr | |
| import os | |
| from pyannote.audio import Pipeline | |
| # instantiate the pipeline | |
| try: | |
| pipeline = Pipeline.from_pretrained( | |
| "pyannote/speaker-diarization-3.1", | |
| use_auth_token=os.environ["api"] | |
| ) | |
| # Move the pipeline to the GPU | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| pipeline.to(device) | |
| except Exception as e: | |
| print(f"Error initializing pipeline: {e}") | |
| pipeline = None | |
| def save_audio(audio): | |
| if pipeline is None: | |
| return "Error: Pipeline not initialized" | |
| # Read the uploaded audio file as bytes | |
| with open(audio, "rb") as f: | |
| audio_data = f.read() | |
| # Save the uploaded audio file to a temporary location | |
| with open("temp.wav", "wb") as f: | |
| f.write(audio_data) | |
| return "temp.wav" | |
| def diarize_audio(temp_file, num_speakers, min_speakers, max_speakers): | |
| if pipeline is None: | |
| return "Error: Pipeline not initialized" | |
| try: | |
| params = {} | |
| if num_speakers > 0: | |
| params["num_speakers"] = num_speakers | |
| if min_speakers > 0: | |
| params["min_speakers"] = min_speakers | |
| if max_speakers > 0: | |
| params["max_speakers"] = max_speakers | |
| diarization = pipeline(temp_file, **params) | |
| except Exception as e: | |
| return f"Error processing audio: {e}" | |
| # Remove the temporary file | |
| os.remove(temp_file) | |
| # Return the diarization output | |
| return str(diarization) | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| # 🗣️Pyannote Speaker Diarization 3.1🗣️ | |
| This model takes an audio file as input and outputs the diarization of the speakers in the audio. | |
| Please upload an audio file and adjust the parameters as needed. | |
| The maximum length of the audio file it can process is around **35-40 minutes**. | |
| If you find this space helpful, please ❤ it. | |
| """) | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
| num_speakers_input = gr.Number(label="Number of Speakers (The maximum number of speakers to detect)", value=0) | |
| min_speakers_input = gr.Number(label="Minimum Number of Speakers (The maximum number of speakers to detect)", value=0) | |
| max_speakers_input = gr.Number(label="Maximum Number of Speakers (The maximum number of speakers to detect)", value=0) | |
| process_button = gr.Button("Process") | |
| diarization_output = gr.Textbox(label="Diarization Output") | |
| process_button.click( | |
| fn=lambda audio, num_speakers, min_speakers, max_speakers: | |
| diarize_audio(save_audio(audio), num_speakers, min_speakers, max_speakers), | |
| inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input], | |
| outputs=diarization_output | |
| ) | |
| demo.launch() |