Spaces:
Sleeping
Sleeping
File size: 1,507 Bytes
5a9136d 994674b 5a9136d 994674b 5a9136d 994674b 5a9136d 994674b 5a9136d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch
import librosa
# Load the fine-tuned Whisper model and processor
model_name = "hackergeek98/tinyyyy_whisper"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)
# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# Define the ASR function
def transcribe_audio(audio_file):
# Load audio file using librosa (supports multiple formats)
audio_data, sampling_rate = librosa.load(audio_file, sr=16000) # Resample to 16kHz
# Preprocess the audio
inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
# Generate transcription
with torch.no_grad():
predicted_ids = model.generate(inputs)
# Decode the transcription
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
# Create the Gradio interface
interface = gr.Interface(
fn=transcribe_audio, # Function to call
inputs=gr.Audio(type="filepath"), # Input: Upload audio file (any format)
outputs=gr.Textbox(label="Transcription"), # Output: Display transcription
title="Whisper ASR: Tinyyyy Model",
description="Upload an audio file (e.g., .wav, .mp3, .ogg), and the fine-tuned Whisper model will transcribe it.",
)
# Launch the app
interface.launch() |