Spaces:

pritamdeka
/

Whisper-Audio-Transcriber-Summarizer

Sleeping

App Files Files Community

pritamdeka commited on Oct 7, 2024

Commit

a62c4d4

verified ·

1 Parent(s): d69306e

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -15

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import gradio as gr
 import whisper
 from transformers import pipeline
-import librosa
-import os
 # Load Whisper model
 whisper_model = whisper.load_model("base")
@@ -18,20 +17,18 @@ def get_summarizer(model_name):
     else:
         return None
-# Function to transcribe audio file using Whisper
-def transcribe_audio(model_size, audio_path):
-    # Debug: Check if the file path is correctly passed
-    print(f"Audio file path received: {audio_path}")
-    if audio_path is None or not os.path.exists(audio_path):
-        return "No audio file provided or file path invalid."
     # Load the selected Whisper model
     model = whisper.load_model(model_size)
-    # Load and convert audio using librosa
-    audio_data, sample_rate = librosa.load(audio_path, sr=16000)
     # Transcribe the audio file
     result = model.transcribe(audio_data)
     transcription = result['text']
@@ -52,9 +49,9 @@ def summarize_text(transcription, model_name):
         return "Invalid summarization model selected."
 # Create a Gradio interface that combines transcription and summarization
-def combined_transcription_and_summarization(model_size, summarizer_model, audio_path):
     # Step 1: Transcribe the audio using Whisper
-    transcription = transcribe_audio(model_size, audio_path)
     # Step 2: Summarize the transcribed text using the chosen summarizer model
     summary = summarize_text(transcription, summarizer_model)
@@ -67,7 +64,7 @@ iface = gr.Interface(
     inputs=[
         gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"),  # Whisper model selection
         gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"),  # Summarizer model selection
-        gr.Audio(type="filepath")  # Audio upload
     ],
     outputs=[
         gr.Textbox(label="Transcription"),         # Output for the transcribed text

 import gradio as gr
 import whisper
 from transformers import pipeline
+import numpy as np
 # Load Whisper model
 whisper_model = whisper.load_model("base")
     else:
         return None
+# Function to transcribe raw audio data using Whisper
+def transcribe_audio(model_size, audio):
+    if audio is None:
+        return "No audio file provided."
+    # Convert the input audio (which is a tuple) into the format Whisper expects
+    audio_data = np.array(audio[1])  # audio[1] is the raw audio data
+    sample_rate = 16000  # Whisper expects a sample rate of 16kHz
     # Load the selected Whisper model
     model = whisper.load_model(model_size)
     # Transcribe the audio file
     result = model.transcribe(audio_data)
     transcription = result['text']
         return "Invalid summarization model selected."
 # Create a Gradio interface that combines transcription and summarization
+def combined_transcription_and_summarization(model_size, summarizer_model, audio):
     # Step 1: Transcribe the audio using Whisper
+    transcription = transcribe_audio(model_size, audio)
     # Step 2: Summarize the transcribed text using the chosen summarizer model
     summary = summarize_text(transcription, summarizer_model)
     inputs=[
         gr.Dropdown(label="Choose Whisper Model", choices=["tiny", "base", "small", "medium", "large"], value="base"),  # Whisper model selection
         gr.Dropdown(label="Choose Summarizer Model", choices=["BART (facebook/bart-large-cnn)", "T5 (t5-small)", "Pegasus (google/pegasus-xsum)"], value="BART (facebook/bart-large-cnn)"),  # Summarizer model selection
+        gr.Audio(type="numpy")  # This will pass raw audio data as a numpy array
     ],
     outputs=[
         gr.Textbox(label="Transcription"),         # Output for the transcribed text