MeBai commited on
Commit
bc437c2
·
verified ·
1 Parent(s): 86f9703

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -12
app.py CHANGED
@@ -1,13 +1,17 @@
1
  import gradio as gr
2
- from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  from datasets import load_dataset
 
 
4
  import torch
5
  # import librosa
6
 
7
  # 加载 Whisper 模型和 processor
8
- model_name = "openai/whisper-large-v3-turbo"
9
- processor = WhisperProcessor.from_pretrained(model_name)
10
- model = WhisperForConditionalGeneration.from_pretrained(model_name)
 
 
11
 
12
  # 加载数据集 bigcode/the-stack
13
 
@@ -16,17 +20,20 @@ ds = load_dataset("CoIR-Retrieval/CodeSearchNet-php-queries-corpus")
16
  def transcribe(audio_path):
17
  # 加载音频文件并转换为信号
18
  # audio, sr = librosa.load(audio_path, sr=16000)
19
- input_values = processor(audio_path, return_tensors="pt", sampling_rate=16000).input_values
20
 
21
- # 模型推理
22
- with torch.no_grad():
23
- logits = model(input_values).logits
24
 
25
- predicted_ids = torch.argmax(logits, dim=-1)
26
- transcription = processor.batch_decode(predicted_ids)
27
-
 
 
 
28
  # 返回转录结果
29
- return transcription[0]
30
 
31
 
32
  # Gradio 界面
 
1
  import gradio as gr
2
+ # from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  from datasets import load_dataset
4
+ from transformers import pipeline
5
+
6
  import torch
7
  # import librosa
8
 
9
  # 加载 Whisper 模型和 processor
10
+ # model_name = "openai/whisper-large-v3-turbo"
11
+ # processor = WhisperProcessor.from_pretrained(model_name)
12
+ # model = WhisperForConditionalGeneration.from_pretrained(model_name)
13
+
14
+ model = pipeline("automatic-speech-recognition", model="ylacombe/whisper-large-v3-turbo", chunk_length_s=30, device=0)
15
 
16
  # 加载数据集 bigcode/the-stack
17
 
 
20
  def transcribe(audio_path):
21
  # 加载音频文件并转换为信号
22
  # audio, sr = librosa.load(audio_path, sr=16000)
23
+ # input_values = processor(audio_path, return_tensors="pt", sampling_rate=16000).["text"]
24
 
25
+ # # 模型推理
26
+ # with torch.no_grad():
27
+ # logits = model(input_values).logits
28
 
29
+ # predicted_ids = torch.argmax(logits, dim=-1)
30
+ # transcription = processor.batch_decode(predicted_ids)
31
+
32
+ transcription = model(audio_path,batch_size=1000, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
33
+
34
+ # result = pipe(sample)
35
  # 返回转录结果
36
+ return transcription
37
 
38
 
39
  # Gradio 界面