Spaces:

pritamdeka
/

Whisper-Audio-Transcriber-Summarizer

Sleeping

App Files Files Community

pritamdeka commited on Oct 8, 2024

Commit

4206062

verified ·

1 Parent(s): 56925b6

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -19

app.py CHANGED Viewed

@@ -1,36 +1,67 @@
 import gradio as gr
 import whisper
-import os
-# Load Whisper model
-model = whisper.load_model("base")
-# Function to transcribe audio file using Whisper
-def transcribe_audio(audio_file):
-    # Check if the audio file exists and print the file path for debugging
     if audio_file is None:
         return "No audio file provided."
-    # Debugging: Print the file path to check if Gradio passes the file path correctly
-    print(f"Audio file path: {audio_file}")
     if not os.path.exists(audio_file):
         return "The audio file does not exist or is inaccessible."
-    # Load and transcribe the audio file
-    result = model.transcribe(audio_file)
-    transcription = result['text']
     return transcription
-# Gradio interface for transcription
 iface = gr.Interface(
-    fn=transcribe_audio,   # Function to process audio file
-    inputs=gr.Audio(type="filepath"),  # Audio upload, pass file path
-    outputs="text",       # Output the transcription as text
-    title="Whisper Audio Transcription",
-    description="Upload an audio file and get the transcription."
 )
-# Launch the Gradio interface with a shareable link (required for Colab)
 iface.launch()

 import gradio as gr
+import torch
 import whisper
+import librosa
+from transformers import pipeline
+# Check if DistilWhisper is available on Hugging Face
+# This is a placeholder model name, update it with an actual distillation model if available
+# distil_whisper_model = "huggingface/distil-whisper-model"
+# If no distil version, load smaller Whisper model for speed (e.g., "base" or "tiny")
+model = whisper.load_model("tiny")
+# Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
+def chunk_audio(audio_file, chunk_size=5):
+    # Load audio file
+    audio, sr = librosa.load(audio_file, sr=16000)
+    # Determine the number of chunks (in seconds)
+    total_duration = len(audio) / sr
+    num_chunks = int(total_duration // chunk_size)
+    # Split the audio into chunks
+    audio_chunks = []
+    for i in range(num_chunks):
+        start = int(i * chunk_size * sr)
+        end = int((i + 1) * chunk_size * sr)
+        audio_chunks.append(audio[start:end])
+    # If the last chunk is shorter than chunk_size, append it as well
+    if len(audio) % (chunk_size * sr) != 0:
+        audio_chunks.append(audio[num_chunks * chunk_size * sr:])
+    return audio_chunks, sr
+# Function to transcribe the audio in chunks using Whisper
+def transcribe_audio_in_chunks(audio_file):
     if audio_file is None:
         return "No audio file provided."
+    # Check the audio file path
     if not os.path.exists(audio_file):
         return "The audio file does not exist or is inaccessible."
+    # Chunk the audio into 5-second parts
+    chunks, sr = chunk_audio(audio_file, chunk_size=5)
+    # Process each chunk and append the results as real-time transcription
+    transcription = ""
+    for i, chunk in enumerate(chunks):
+        # Transcribe each chunk
+        result = model.transcribe(chunk)
+        transcription += f"Chunk {i + 1}: {result['text']}\n"
     return transcription
+# Gradio interface for real-time transcription with chunking
 iface = gr.Interface(
+    fn=transcribe_audio_in_chunks,  # Function to process the audio file in chunks
+    inputs=gr.Audio(type="filepath"),  # Audio upload, passing file path
+    outputs="text",  # Output transcriptions in real-time
+    title="Whisper Audio Transcription with Chunking",
+    description="Upload an audio file, and Whisper will transcribe it in real-time as chunks."
 )
+# Launch the Gradio interface with a shareable link (use share=True for Colab)
 iface.launch()