File size: 1,742 Bytes
b47054b bc437c2 1a82ef7 cc5ccf1 bc437c2 1a82ef7 86f9703 b47054b 72d6ac4 bc437c2 376dc4c cc5ccf1 376dc4c 1a82ef7 72d6ac4 6f65c39 a2cdd68 1a82ef7 a2cdd68 86f9703 bc437c2 a2cdd68 bc437c2 a2cdd68 bc437c2 72d6ac4 bc437c2 1a82ef7 a2cdd68 72d6ac4 1a82ef7 86f9703 1a82ef7 72d6ac4 1a82ef7 72d6ac4 1a82ef7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
# from transformers import WhisperProcessor, WhisperForConditionalGeneration
from datasets import load_dataset
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import torch
# import librosa
# 加载 Whisper 模型和 processor
# model_name = "openai/whisper-large-v3-turbo"
# processor = WhisperProcessor.from_pretrained(model_name)
# model = WhisperForConditionalGeneration.from_pretrained(model_name)
model_id = "openai/whisper-large-v3-turbo"
models = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, low_cpu_mem_usage=True
)
model = pipeline("automatic-speech-recognition", model=models, chunk_length_s=30, device=0)
# 加载数据集 bigcode/the-stack
ds = load_dataset("CoIR-Retrieval/CodeSearchNet-php-queries-corpus")
def transcribe(audio_path):
# 加载音频文件并转换为信号
# audio, sr = librosa.load(audio_path, sr=16000)
# input_values = processor(audio_path, return_tensors="pt", sampling_rate=16000).["text"]
# # 模型推理
# with torch.no_grad():
# logits = model(input_values).logits
# predicted_ids = torch.argmax(logits, dim=-1)
# transcription = processor.batch_decode(predicted_ids)
transcription = model(audio_path,batch_size=1000, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
# result = pipe(sample)
# 返回转录结果
return transcription
# Gradio 界面
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs="text",
title="Whisper Transcription for Developers",
description="使用 Whisper 和 bigcode 数据集转录开发者相关术语。"
)
# 启动 Gradio 应用
iface.launch()
|