|
import gradio as gr |
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
from datasets import load_dataset |
|
import torch |
|
|
|
|
|
model_name = "openai/whisper-large-v3-turbo" |
|
processor = WhisperProcessor.from_pretrained(model_name) |
|
model = WhisperForConditionalGeneration.from_pretrained(model_name) |
|
|
|
|
|
dataset = load_dataset("bigcode/the-stack", data_dir="data/html") |
|
|
|
def transcribe(audio): |
|
|
|
audio_input = processor(audio, return_tensors="pt").input_values |
|
with torch.no_grad(): |
|
logits = model(audio_input).logits |
|
predicted_ids = torch.argmax(logits, dim=-1) |
|
transcription = processor.batch_decode(predicted_ids) |
|
|
|
|
|
return transcription[0] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(source="microphone", type="filepath"), |
|
outputs="text", |
|
title="Whisper Transcription for Developers", |
|
description="Transcribe developer-related terms using Whisper and bigcode dataset for contextual support." |
|
) |
|
|
|
|
|
iface.launch() |
|
|