Spaces:

hackergeek98
/

tinyyy

Sleeping

tinyyy / app.py

Update app.py

994674b verified 8 months ago

1.51 kB

	import gradio as gr
	from transformers import WhisperProcessor, WhisperForConditionalGeneration
	import torch
	import librosa

	# Load the fine-tuned Whisper model and processor
	model_name = "hackergeek98/tinyyyy_whisper"
	processor = WhisperProcessor.from_pretrained(model_name)
	model = WhisperForConditionalGeneration.from_pretrained(model_name)

	# Move model to GPU if available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	# Define the ASR function
	def transcribe_audio(audio_file):
	# Load audio file using librosa (supports multiple formats)
	audio_data, sampling_rate = librosa.load(audio_file, sr=16000) # Resample to 16kHz

	# Preprocess the audio
	inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)

	# Generate transcription
	with torch.no_grad():
	predicted_ids = model.generate(inputs)

	# Decode the transcription
	transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
	return transcription

	# Create the Gradio interface
	interface = gr.Interface(
	fn=transcribe_audio, # Function to call
	inputs=gr.Audio(type="filepath"), # Input: Upload audio file (any format)
	outputs=gr.Textbox(label="Transcription"), # Output: Display transcription
	title="Whisper ASR: Tinyyyy Model",
	description="Upload an audio file (e.g., .wav, .mp3, .ogg), and the fine-tuned Whisper model will transcribe it.",
	)

	# Launch the app
	interface.launch()