wang0507 commited on
Commit
b66022d
·
1 Parent(s): b685b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -9
app.py CHANGED
@@ -1,11 +1,25 @@
 
1
  import os
2
  os.system("pip install git+https://github.com/openai/whisper.git")
3
  import gradio as gr
4
  import whisper
 
 
 
 
 
5
 
6
 
7
 
8
  model = whisper.load_model("base")
 
 
 
 
 
 
 
 
9
 
10
 
11
 
@@ -21,12 +35,18 @@ def inference(audio):
21
  result = whisper.decode(model, mel, options)
22
 
23
  return result.text
24
-
25
- iface = gr.Interface(
26
- fn=inference,
27
- inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
28
- outputs="text"
29
- )
30
-
31
- # 启动 Gradio 界面
32
- iface.launch()
 
 
 
 
 
 
 
1
+
2
  import os
3
  os.system("pip install git+https://github.com/openai/whisper.git")
4
  import gradio as gr
5
  import whisper
6
+ from transformers import pipeline
7
+ import numpy as np
8
+
9
+
10
+ p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
11
 
12
 
13
 
14
  model = whisper.load_model("base")
15
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
16
+
17
+ def transcribe(audio):
18
+ sr, y = audio
19
+ y = y.astype(np.float32)
20
+ y /= np.max(np.abs(y))
21
+
22
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
23
 
24
 
25
 
 
35
  result = whisper.decode(model, mel, options)
36
 
37
  return result.text
38
+
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown("Flip text or image files using this demo.")
41
+ with gr.Tab("語音轉文字"):
42
+ fn=inference,
43
+ inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
44
+ outputs="text"
45
+ with gr.Tab("Real Time Speech Recognition"):
46
+ with gr.Row():
47
+ transcribe,
48
+ gr.Audio(sources=["microphone"]),
49
+ "text",
50
+
51
+
52
+ demo.launch()