wang0507 commited on
Commit
24bad25
·
1 Parent(s): 2988a3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -7
app.py CHANGED
@@ -3,10 +3,23 @@ import os
3
  os.system("pip install git+https://github.com/openai/whisper.git")
4
  import gradio as gr
5
  import whisper
 
 
 
 
 
6
 
7
 
8
 
9
  model = whisper.load_model("base")
 
 
 
 
 
 
 
 
10
 
11
 
12
 
@@ -22,11 +35,19 @@ def inference(audio):
22
  result = whisper.decode(model, mel, options)
23
 
24
  return result.text
25
-
26
- iface = gr.Interface(
27
- fn=inference,
28
- inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
29
- outputs="text"
30
- )
31
- iface.launch()
 
 
 
 
 
 
 
 
32
 
 
3
  os.system("pip install git+https://github.com/openai/whisper.git")
4
  import gradio as gr
5
  import whisper
6
+ from transformers import pipeline
7
+ import numpy as np
8
+
9
+
10
+ p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
11
 
12
 
13
 
14
  model = whisper.load_model("base")
15
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
16
+
17
+ def transcribe(audio):
18
+ sr, y = audio
19
+ y = y.astype(np.float32)
20
+ y /= np.max(np.abs(y))
21
+
22
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
23
 
24
 
25
 
 
35
  result = whisper.decode(model, mel, options)
36
 
37
  return result.text
38
+
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown("Flip text or image files using this demo.")
41
+ with gr.Tab("語音轉文字"):
42
+ fn=inference,
43
+ inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
44
+ outputs="text"
45
+ with gr.Tab("Real Time Speech Recognition"):
46
+ transcribe,
47
+ gr.Audio(sources=["microphone"]),
48
+ "text",
49
+
50
+
51
+ # 启动 Gradio 界面
52
+ demo.launch()
53