File size: 3,017 Bytes
2459bb2 ae87c60 2a84333 2459bb2 ae87c60 ec4c5f1 2a84333 8c8114a 2a84333 ec4c5f1 2a84333 ae87c60 2a84333 ae87c60 8c8114a ae87c60 2459bb2 ae87c60 ec4c5f1 ae87c60 ec4c5f1 ae87c60 2459bb2 ae87c60 8c8114a ae87c60 8c8114a ae87c60 2459bb2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
import whisper
from transformers import pipeline
import librosa
# Load Whisper model
whisper_model = whisper.load_model("base")
# Load traditional summarization models
def get_summarizer(model_name):
if model_name == "BART (facebook/bart-large-cnn)":
return pipeline("summarization", model="facebook/bart-large-cnn")
elif model_name == "T5 (t5-small)":
return pipeline("summarization", model="t5-small")
elif model_name == "Pegasus (google/pegasus-xsum)":
return pipeline("summarization", model="google/pegasus-xsum")
else:
return None
# Function to transcribe audio file using Whisper
def transcribe_audio(model_size, audio_path):
if audio_path is None:
return "No audio file provided."
# Load the selected Whisper model
model = whisper.load_model(model_size)
# Load and convert audio using librosa
audio_data, sample_rate = librosa.load(audio_path, sr=16000)
# Transcribe the audio file
result = model.transcribe(audio_data)
transcription = result['text']
return transcription
# Function to summarize the transcribed text
def summarize_text(transcription, model_name):
if len(transcription.strip()) == 0:
return "No text to summarize."
summarizer = get_summarizer(model_name)
if summarizer:
summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
return summary
else:
return "Invalid summarization model selected."
# Create a Gradio interface that combines transcription and summarization
def combined_transcription_and_summarization(model_size, summarizer_model, audio_path):
# Step 1: Transcribe the audio using Whisper
transcription = transcribe_audio(model_size, audio_path)
# Step 2: Summarize the transcribed text using the chosen summarizer model
summary = summarize_text(transcription, summarizer_model)
return transcription, summary
# Gradio interface for transcription and summarization
iface = gr.Interface(
fn=combined_transcription_and_summarization, # The combined function
inputs=[
gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
gr.Audio(type="filepath") # Audio upload
],
outputs=[
gr.Textbox(label="Transcription"), # Output for the transcribed text
gr.Textbox(label="Summary") # Output for the summary
],
title="Whisper Audio Transcription and Summarization",
description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription."
)
# Launch the interface
iface.launch()
|