Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import whisper
|
3 |
from transformers import pipeline
|
4 |
-
import
|
5 |
-
import os
|
6 |
|
7 |
# Load Whisper model
|
8 |
whisper_model = whisper.load_model("base")
|
@@ -18,20 +17,18 @@ def get_summarizer(model_name):
|
|
18 |
else:
|
19 |
return None
|
20 |
|
21 |
-
# Function to transcribe audio
|
22 |
-
def transcribe_audio(model_size,
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
28 |
|
29 |
# Load the selected Whisper model
|
30 |
model = whisper.load_model(model_size)
|
31 |
|
32 |
-
# Load and convert audio using librosa
|
33 |
-
audio_data, sample_rate = librosa.load(audio_path, sr=16000)
|
34 |
-
|
35 |
# Transcribe the audio file
|
36 |
result = model.transcribe(audio_data)
|
37 |
transcription = result['text']
|
@@ -52,9 +49,9 @@ def summarize_text(transcription, model_name):
|
|
52 |
return "Invalid summarization model selected."
|
53 |
|
54 |
# Create a Gradio interface that combines transcription and summarization
|
55 |
-
def combined_transcription_and_summarization(model_size, summarizer_model,
|
56 |
# Step 1: Transcribe the audio using Whisper
|
57 |
-
transcription = transcribe_audio(model_size,
|
58 |
|
59 |
# Step 2: Summarize the transcribed text using the chosen summarizer model
|
60 |
summary = summarize_text(transcription, summarizer_model)
|
@@ -67,7 +64,7 @@ iface = gr.Interface(
|
|
67 |
inputs=[
|
68 |
gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
|
69 |
gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
|
70 |
-
gr.Audio(type="
|
71 |
],
|
72 |
outputs=[
|
73 |
gr.Textbox(label="Transcription"), # Output for the transcribed text
|
|
|
1 |
import gradio as gr
|
2 |
import whisper
|
3 |
from transformers import pipeline
|
4 |
+
import numpy as np
|
|
|
5 |
|
6 |
# Load Whisper model
|
7 |
whisper_model = whisper.load_model("base")
|
|
|
17 |
else:
|
18 |
return None
|
19 |
|
20 |
+
# Function to transcribe raw audio data using Whisper
|
21 |
+
def transcribe_audio(model_size, audio):
|
22 |
+
if audio is None:
|
23 |
+
return "No audio file provided."
|
24 |
+
|
25 |
+
# Convert the input audio (which is a tuple) into the format Whisper expects
|
26 |
+
audio_data = np.array(audio[1]) # audio[1] is the raw audio data
|
27 |
+
sample_rate = 16000 # Whisper expects a sample rate of 16kHz
|
28 |
|
29 |
# Load the selected Whisper model
|
30 |
model = whisper.load_model(model_size)
|
31 |
|
|
|
|
|
|
|
32 |
# Transcribe the audio file
|
33 |
result = model.transcribe(audio_data)
|
34 |
transcription = result['text']
|
|
|
49 |
return "Invalid summarization model selected."
|
50 |
|
51 |
# Create a Gradio interface that combines transcription and summarization
|
52 |
+
def combined_transcription_and_summarization(model_size, summarizer_model, audio):
|
53 |
# Step 1: Transcribe the audio using Whisper
|
54 |
+
transcription = transcribe_audio(model_size, audio)
|
55 |
|
56 |
# Step 2: Summarize the transcribed text using the chosen summarizer model
|
57 |
summary = summarize_text(transcription, summarizer_model)
|
|
|
64 |
inputs=[
|
65 |
gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"), # Whisper model selection
|
66 |
gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"), # Summarizer model selection
|
67 |
+
gr.Audio(type="numpy") # This will pass raw audio data as a numpy array
|
68 |
],
|
69 |
outputs=[
|
70 |
gr.Textbox(label="Transcription"), # Output for the transcribed text
|