File size: 3,017 Bytes
2459bb2
 
ae87c60
2a84333
2459bb2
ae87c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4c5f1
 
2a84333
 
 
8c8114a
2a84333
 
ec4c5f1
2a84333
 
 
ae87c60
2a84333
ae87c60
 
 
 
 
 
 
 
8c8114a
ae87c60
 
 
 
 
2459bb2
ae87c60
ec4c5f1
ae87c60
ec4c5f1
ae87c60
 
 
 
 
 
 
2459bb2
ae87c60
8c8114a
ae87c60
 
 
 
 
 
 
8c8114a
ae87c60
 
2459bb2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import whisper
from transformers import pipeline
import librosa

# Load Whisper model
whisper_model = whisper.load_model("base")

# Load traditional summarization models
def get_summarizer(model_name):
    if model_name == "BART (facebook/bart-large-cnn)":
        return pipeline("summarization", model="facebook/bart-large-cnn")
    elif model_name == "T5 (t5-small)":
        return pipeline("summarization", model="t5-small")
    elif model_name == "Pegasus (google/pegasus-xsum)":
        return pipeline("summarization", model="google/pegasus-xsum")
    else:
        return None

# Function to transcribe audio file using Whisper
def transcribe_audio(model_size, audio_path):
    if audio_path is None:
        return "No audio file provided."
    
    # Load the selected Whisper model
    model = whisper.load_model(model_size)

    # Load and convert audio using librosa
    audio_data, sample_rate = librosa.load(audio_path, sr=16000)

    # Transcribe the audio file
    result = model.transcribe(audio_data)
    transcription = result['text']
    
    return transcription

# Function to summarize the transcribed text
def summarize_text(transcription, model_name):
    if len(transcription.strip()) == 0:
        return "No text to summarize."
    
    summarizer = get_summarizer(model_name)
    
    if summarizer:
        summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
        return summary
    else:
        return "Invalid summarization model selected."

# Create a Gradio interface that combines transcription and summarization
def combined_transcription_and_summarization(model_size, summarizer_model, audio_path):
    # Step 1: Transcribe the audio using Whisper
    transcription = transcribe_audio(model_size, audio_path)
    
    # Step 2: Summarize the transcribed text using the chosen summarizer model
    summary = summarize_text(transcription, summarizer_model)
    
    return transcription, summary

# Gradio interface for transcription and summarization
iface = gr.Interface(
    fn=combined_transcription_and_summarization,   # The combined function
    inputs=[
        gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"),  # Whisper model selection
        gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"),  # Summarizer model selection
        gr.Audio(type="filepath")  # Audio upload
    ],
    outputs=[
        gr.Textbox(label="Transcription"),         # Output for the transcribed text
        gr.Textbox(label="Summary")                # Output for the summary
    ],
    title="Whisper Audio Transcription and Summarization",
    description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription."
)

# Launch the interface
iface.launch()