import gradio as gr
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import torch

# Load the fine-tuned Whisper model and processor
model_name = "hackergeek98/tinyyyy_whisper"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)

# Move model to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Define the ASR function
def transcribe_audio(audio):
    # Load audio file
    sampling_rate, audio_data = audio

    # Preprocess the audio
    inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)

    # Generate transcription
    with torch.no_grad():
        predicted_ids = model.generate(inputs)

    # Decode the transcription
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
    return transcription

# Create the Gradio interface
interface = gr.Interface(
    fn=transcribe_audio,  # Function to call
    inputs=gr.Audio(source="upload", type="numpy"),  # Input: Upload audio file
    outputs=gr.Textbox(label="Transcription"),  # Output: Display transcription
    title="Whisper ASR: Tinyyyy Model",
    description="Upload an audio file, and the fine-tuned Whisper model will transcribe it.",
    examples=["example1.wav", "example2.wav"],  # Example audio files
)

# Launch the app
interface.launch()