import gradio as gr from transformers import WhisperProcessor, WhisperForConditionalGeneration from datasets import load_dataset import torch # Load Whisper model and processor model_name = "openai/whisper-large-v3-turbo" processor = WhisperProcessor.from_pretrained(model_name) model = WhisperForConditionalGeneration.from_pretrained(model_name) # Load dataset (bigcode/the-stack) dataset = load_dataset("bigcode/the-stack", data_dir="data/html") def transcribe(audio): # Process audio for transcription audio_input = processor(audio, return_tensors="pt").input_values with torch.no_grad(): logits = model(audio_input).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids) # Return the transcription return transcription[0] # Gradio interface iface = gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Transcription for Developers", description="Transcribe developer-related terms using Whisper and bigcode dataset for contextual support." ) # Launch the Gradio app iface.launch()