|
import gradio as gr |
|
import whisper |
|
from transformers import pipeline |
|
|
|
|
|
whisper_model = whisper.load_model("base") |
|
|
|
|
|
def get_summarizer(model_name): |
|
if model_name == "BART (facebook/bart-large-cnn)": |
|
return pipeline("summarization", model="facebook/bart-large-cnn") |
|
elif model_name == "T5 (t5-small)": |
|
return pipeline("summarization", model="t5-small") |
|
elif model_name == "Pegasus (google/pegasus-xsum)": |
|
return pipeline("summarization", model="google/pegasus-xsum") |
|
else: |
|
return None |
|
|
|
|
|
def transcribe_audio(model_size, audio): |
|
model = whisper.load_model(model_size) |
|
result = model.transcribe(audio) |
|
transcription = result['text'] |
|
return transcription |
|
|
|
|
|
def summarize_text(transcription, model_name): |
|
if len(transcription.strip()) == 0: |
|
return "No text to summarize." |
|
|
|
summarizer = get_summarizer(model_name) |
|
|
|
if summarizer: |
|
summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] |
|
return summary |
|
else: |
|
return "Invalid summarization model selected." |
|
|
|
|
|
def combined_transcription_and_summarization(model_size, summarizer_model, audio): |
|
|
|
transcription = transcribe_audio(model_size, audio) |
|
|
|
|
|
summary = summarize_text(transcription, summarizer_model) |
|
|
|
return transcription, summary |
|
|
|
|
|
iface = gr.Interface( |
|
fn=combined_transcription_and_summarization, |
|
inputs=[ |
|
gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), |
|
gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), |
|
gr.Audio(type="filepath") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Transcription"), |
|
gr.Textbox(label="Summary") |
|
], |
|
title="Whisper Audio Transcription and Summarization", |
|
description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription." |
|
) |
|
|
|
|
|
iface.launch() |
|
|