|
import gradio as gr |
|
|
|
from datasets import load_dataset |
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline |
|
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_id = "openai/whisper-large-v3-turbo" |
|
|
|
models = AutoModelForSpeechSeq2Seq.from_pretrained( |
|
model_id, low_cpu_mem_usage=True |
|
) |
|
|
|
model = pipeline("automatic-speech-recognition", model=models, chunk_length_s=30, device=0) |
|
|
|
|
|
|
|
ds = load_dataset("CoIR-Retrieval/CodeSearchNet-php-queries-corpus") |
|
|
|
def transcribe(audio_path): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
transcription = model(audio_path,batch_size=1000, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"] |
|
|
|
|
|
|
|
return transcription |
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(sources="microphone", type="filepath"), |
|
outputs="text", |
|
title="Whisper Transcription for Developers", |
|
description="使用 Whisper 和 bigcode 数据集转录开发者相关术语。" |
|
) |
|
|
|
|
|
iface.launch() |
|
|