File size: 3,188 Bytes
2459bb2
 
ae87c60
2a84333
d69306e
2459bb2
ae87c60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec4c5f1
d69306e
 
 
 
 
 
2a84333
8c8114a
2a84333
 
ec4c5f1
2a84333
 
 
ae87c60
2a84333
ae87c60
 
 
 
 
 
 
 
8c8114a
ae87c60
 
 
 
 
2459bb2
ae87c60
ec4c5f1
ae87c60
ec4c5f1
ae87c60
 
 
 
 
 
 
2459bb2
ae87c60
8c8114a
ae87c60
 
 
 
 
 
 
8c8114a
ae87c60
 
2459bb2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import whisper
from transformers import pipeline
import librosa
import os

# Load Whisper model
whisper_model = whisper.load_model("base")

# Load traditional summarization models
def get_summarizer(model_name):
    if model_name == "BART (facebook/bart-large-cnn)":
        return pipeline("summarization", model="facebook/bart-large-cnn")
    elif model_name == "T5 (t5-small)":
        return pipeline("summarization", model="t5-small")
    elif model_name == "Pegasus (google/pegasus-xsum)":
        return pipeline("summarization", model="google/pegasus-xsum")
    else:
        return None

# Function to transcribe audio file using Whisper
def transcribe_audio(model_size, audio_path):
    # Debug: Check if the file path is correctly passed
    print(f"Audio file path received: {audio_path}")

    if audio_path is None or not os.path.exists(audio_path):
        return "No audio file provided or file path invalid."

    # Load the selected Whisper model
    model = whisper.load_model(model_size)

    # Load and convert audio using librosa
    audio_data, sample_rate = librosa.load(audio_path, sr=16000)

    # Transcribe the audio file
    result = model.transcribe(audio_data)
    transcription = result['text']
    
    return transcription

# Function to summarize the transcribed text
def summarize_text(transcription, model_name):
    if len(transcription.strip()) == 0:
        return "No text to summarize."
    
    summarizer = get_summarizer(model_name)
    
    if summarizer:
        summary = summarizer(transcription, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
        return summary
    else:
        return "Invalid summarization model selected."

# Create a Gradio interface that combines transcription and summarization
def combined_transcription_and_summarization(model_size, summarizer_model, audio_path):
    # Step 1: Transcribe the audio using Whisper
    transcription = transcribe_audio(model_size, audio_path)
    
    # Step 2: Summarize the transcribed text using the chosen summarizer model
    summary = summarize_text(transcription, summarizer_model)
    
    return transcription, summary

# Gradio interface for transcription and summarization
iface = gr.Interface(
    fn=combined_transcription_and_summarization,   # The combined function
    inputs=[
        gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"),  # Whisper model selection
        gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"),  # Summarizer model selection
        gr.Audio(type="filepath")  # Audio upload
    ],
    outputs=[
        gr.Textbox(label="Transcription"),         # Output for the transcribed text
        gr.Textbox(label="Summary")                # Output for the summary
    ],
    title="Whisper Audio Transcription and Summarization",
    description="Upload an audio file, choose a Whisper model for transcription, and a summarization model to summarize the transcription."
)

# Launch the interface
iface.launch()