File size: 2,198 Bytes
2459bb2 4206062 4eaea04 2459bb2 4eaea04 4206062 d9cee8f a62c4d4 4206062 2a84333 4206062 4eaea04 4206062 2a84333 ae87c60 4206062 2459bb2 4206062 2459bb2 4206062 56925b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import whisper
import librosa
import numpy as np
# Load Whisper model (using tiny for faster performance)
model = whisper.load_model("tiny")
# Chunking function to split the audio into smaller parts (e.g., 5-second chunks)
def chunk_audio(audio_file, chunk_size=5):
# Load audio file
audio, sr = librosa.load(audio_file, sr=16000)
# Determine the number of chunks (in seconds)
total_duration = len(audio) / sr
num_chunks = int(total_duration // chunk_size)
# Split the audio into chunks
audio_chunks = []
for i in range(num_chunks):
start = int(i * chunk_size * sr)
end = int((i + 1) * chunk_size * sr)
audio_chunks.append(audio[start:end])
# If the last chunk is shorter than chunk_size, append it as well
if len(audio) % (chunk_size * sr) != 0:
audio_chunks.append(audio[num_chunks * chunk_size * sr:])
return audio_chunks, sr
# Function to transcribe the audio in chunks using Whisper
def transcribe_audio_in_chunks(audio_file):
if audio_file is None:
return "No audio file provided."
# Chunk the audio into 5-second parts
chunks, sr = chunk_audio(audio_file, chunk_size=5)
# Process each chunk and append the results as real-time transcription
transcription = ""
for i, chunk in enumerate(chunks):
# Convert the chunk into the correct format for Whisper (numpy array of floats)
chunk = np.array(chunk)
# Transcribe each chunk
result = model.transcribe(chunk)
transcription += f"Chunk {i + 1}: {result['text']}\n"
return transcription
# Gradio interface for real-time transcription with chunking
iface = gr.Interface(
fn=transcribe_audio_in_chunks, # Function to process the audio file in chunks
inputs=gr.Audio(type="filepath"), # Audio upload, passing file path
outputs="text", # Output transcriptions in real-time
title="Whisper Audio Transcription with Chunking",
description="Upload an audio file, and Whisper will transcribe it in real-time as chunks."
)
# Launch the Gradio interface with a shareable link (use share=True for Colab)
iface.launch()
|