File size: 1,742 Bytes
b47054b
bc437c2
1a82ef7
cc5ccf1
bc437c2
1a82ef7
86f9703
b47054b
72d6ac4
bc437c2
 
 
 
376dc4c
 
 
cc5ccf1
376dc4c
 
 
1a82ef7
72d6ac4
6f65c39
a2cdd68
1a82ef7
a2cdd68
 
86f9703
bc437c2
a2cdd68
bc437c2
 
 
a2cdd68
bc437c2
 
 
 
 
 
72d6ac4
bc437c2
1a82ef7
a2cdd68
72d6ac4
1a82ef7
 
86f9703
1a82ef7
 
72d6ac4
1a82ef7
 
72d6ac4
1a82ef7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
# from transformers import WhisperProcessor, WhisperForConditionalGeneration
from datasets import load_dataset
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

import torch
# import librosa

# 加载 Whisper 模型和 processor
# model_name = "openai/whisper-large-v3-turbo"
# processor = WhisperProcessor.from_pretrained(model_name)
# model = WhisperForConditionalGeneration.from_pretrained(model_name)

model_id = "openai/whisper-large-v3-turbo"

models = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id,  low_cpu_mem_usage=True
)

model = pipeline("automatic-speech-recognition", model=models, chunk_length_s=30, device=0)

# 加载数据集 bigcode/the-stack

ds = load_dataset("CoIR-Retrieval/CodeSearchNet-php-queries-corpus")

def transcribe(audio_path):
    # 加载音频文件并转换为信号
    # audio, sr = librosa.load(audio_path, sr=16000)
    # input_values = processor(audio_path, return_tensors="pt", sampling_rate=16000).["text"]

    # # 模型推理
    # with torch.no_grad():
    #     logits = model(input_values).logits

    # predicted_ids = torch.argmax(logits, dim=-1)
    # transcription = processor.batch_decode(predicted_ids)

    transcription = model(audio_path,batch_size=1000, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]

    # result = pipe(sample)
    # 返回转录结果
    return transcription

    
# Gradio 界面
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources="microphone",  type="filepath"),
    outputs="text",
    title="Whisper Transcription for Developers",
    description="使用 Whisper 和 bigcode 数据集转录开发者相关术语。"
)

# 启动 Gradio 应用
iface.launch()