MeBai commited on
Commit
72d6ac4
·
verified ·
1 Parent(s): d8c6fbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -3,33 +3,33 @@ from transformers import WhisperProcessor, WhisperForConditionalGeneration
3
  from datasets import load_dataset
4
  import torch
5
 
6
- # Load Whisper model and processor
7
  model_name = "openai/whisper-large-v3-turbo"
8
  processor = WhisperProcessor.from_pretrained(model_name)
9
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
10
 
11
- # Load dataset (bigcode/the-stack)
12
- dataset = load_dataset("bigcode/the-stack", data_dir="data/html")
13
 
14
  def transcribe(audio):
15
- # Process audio for transcription
16
  audio_input = processor(audio, return_tensors="pt").input_values
17
  with torch.no_grad():
18
  logits = model(audio_input).logits
19
  predicted_ids = torch.argmax(logits, dim=-1)
20
  transcription = processor.batch_decode(predicted_ids)
21
 
22
- # Return the transcription
23
  return transcription[0]
24
 
25
- # Gradio interface
26
  iface = gr.Interface(
27
  fn=transcribe,
28
  inputs=gr.Audio(source="microphone", type="filepath"),
29
  outputs="text",
30
  title="Whisper Transcription for Developers",
31
- description="Transcribe developer-related terms using Whisper and bigcode dataset for contextual support."
32
  )
33
 
34
- # Launch the Gradio app
35
  iface.launch()
 
3
  from datasets import load_dataset
4
  import torch
5
 
6
+ # 加载 Whisper 模型和 processor
7
  model_name = "openai/whisper-large-v3-turbo"
8
  processor = WhisperProcessor.from_pretrained(model_name)
9
  model = WhisperForConditionalGeneration.from_pretrained(model_name)
10
 
11
+ # 加载数据集 bigcode/the-stack
12
+ dataset = load_dataset("bigcode/the-stack", data_dir="data/html", split="train")
13
 
14
  def transcribe(audio):
15
+ # 处理音频进行转录
16
  audio_input = processor(audio, return_tensors="pt").input_values
17
  with torch.no_grad():
18
  logits = model(audio_input).logits
19
  predicted_ids = torch.argmax(logits, dim=-1)
20
  transcription = processor.batch_decode(predicted_ids)
21
 
22
+ # 返回转录结果
23
  return transcription[0]
24
 
25
+ # Gradio 界面
26
  iface = gr.Interface(
27
  fn=transcribe,
28
  inputs=gr.Audio(source="microphone", type="filepath"),
29
  outputs="text",
30
  title="Whisper Transcription for Developers",
31
+ description="使用 Whisper bigcode 数据集转录开发者相关术语。"
32
  )
33
 
34
+ # 启动 Gradio 应用
35
  iface.launch()