|
import gradio as gr |
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
from datasets import load_dataset |
|
import torch |
|
|
|
|
|
model_name = "openai/whisper-large-v3-turbo" |
|
processor = WhisperProcessor.from_pretrained(model_name) |
|
model = WhisperForConditionalGeneration.from_pretrained(model_name) |
|
|
|
|
|
|
|
ds = load_dataset("CoIR-Retrieval/CodeSearchNet-php-queries-corpus") |
|
|
|
def transcribe(audio_path): |
|
|
|
audio, sr = librosa.load(audio_path, sr=16000) |
|
input_values = processor(audio, return_tensors="pt", sampling_rate=16000).input_values |
|
|
|
|
|
with torch.no_grad(): |
|
logits = model(input_values).logits |
|
|
|
predicted_ids = torch.argmax(logits, dim=-1) |
|
transcription = processor.batch_decode(predicted_ids) |
|
|
|
|
|
return transcription[0] |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio( type="filepath"), |
|
outputs="text", |
|
title="Whisper Transcription for Developers", |
|
description="使用 Whisper 和 bigcode 数据集转录开发者相关术语。" |
|
) |
|
|
|
|
|
iface.launch() |
|
|