import gradio as gr import whisper from transformers import pipeline import librosa import os # Load Whisper model whisper_model = whisper.load_model("base") # Load traditional summarization models def get_summarizer(model_name): if model_name == "BART (facebook/bart-large-cnn)": return pipeline("summarization", model="facebook/bart-large-cnn") elif model_name == "T5 (t5-small)": return pipeline("summarization", model="t5-small") elif model_name == "Pegasus (google/pegasus-xsum)": return pipeline("summarization", model="google/pegasus-xsum") else: return None # Function to transcribe audio file using Whisper def transcribe_audio(model_size, audio_path): # Debug: Check if the file path is correctly passed print(f"Audio file path received: {audio_path}") if audio_path is None or not os.path.exists(audio_path): return "No audio file provided or file path invalid." # Load the selected Whisper model model = whisper.load_model(model_size) # Load and convert audio using librosa audio_data, sample_rate = librosa.load(audio_path, sr=16000) # Transcribe the audio file result = model.transcribe(audio_data) transcription = result['text'] return transcription # Function to summarize the transcribed text def summarize_text(transcription, model_name): if len(transcription.strip()) == 0: return "No text to summarize." summarizer = get_summarizer(model_name) if summarizer: summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] return summary else: return "Invalid summarization model selected." # Create a Gradio interface that combines transcription and summarization def combined_transcription_and_summarization(model_size, summarizer_model, audio_path): # Step 1: Transcribe the audio using Whisper transcription = transcribe_audio(model_size, audio_path) # Step 2: Summarize the transcribed text using the chosen summarizer model summary = summarize_text(transcription, summarizer_model) return transcription, summary # Gradio interface for transcription and summarization iface = gr.Interface( fn=combined_transcription_and_summarization, # The combined function inputs=[ gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection gr.Audio(type="filepath") # Audio upload ], outputs=[ gr.Textbox(label="Transcription"), # Output for the transcribed text gr.Textbox(label="Summary") # Output for the summary ], title="Whisper Audio Transcription and Summarization", description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription." ) # Launch the interface iface.launch()