Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,14 @@
|
|
|
|
1 |
import os
|
2 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
3 |
import gradio as gr
|
4 |
import whisper
|
5 |
-
|
|
|
6 |
|
7 |
model = whisper.load_model("base")
|
8 |
|
9 |
-
def transcribe(stream, new_chunk):
|
10 |
-
sr, y = new_chunk
|
11 |
-
y = y.astype(np.float32)
|
12 |
-
y /= np.max(np.abs(y))
|
13 |
|
14 |
-
if stream is not None:
|
15 |
-
stream = np.concatenate([stream, y])
|
16 |
-
else:
|
17 |
-
stream = y
|
18 |
-
return stream, whisper.transcribe(stream, model=model, sampling_rate=sr)["text"]
|
19 |
|
20 |
def inference(audio):
|
21 |
audio = whisper.load_audio(audio)
|
@@ -25,26 +18,17 @@ def inference(audio):
|
|
25 |
|
26 |
_, probs = model.detect_language(mel)
|
27 |
|
28 |
-
options = whisper.DecodingOptions(fp16=False)
|
29 |
result = whisper.decode(model, mel, options)
|
30 |
|
31 |
return result.text
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
["state", iface]
|
42 |
-
with gr.Tab("Real Time Speech Recognition"):
|
43 |
-
with gr.Row():
|
44 |
-
transcribe
|
45 |
-
["state", gr.Audio(sources=["microphone"], streaming=True)],
|
46 |
-
["state", "text"],
|
47 |
-
live=True
|
48 |
-
|
49 |
-
demo.launch()
|
50 |
|
|
|
1 |
+
|
2 |
import os
|
3 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
4 |
import gradio as gr
|
5 |
import whisper
|
6 |
+
|
7 |
+
|
8 |
|
9 |
model = whisper.load_model("base")
|
10 |
|
|
|
|
|
|
|
|
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
def inference(audio):
|
14 |
audio = whisper.load_audio(audio)
|
|
|
18 |
|
19 |
_, probs = model.detect_language(mel)
|
20 |
|
21 |
+
options = whisper.DecodingOptions(fp16 = False)
|
22 |
result = whisper.decode(model, mel, options)
|
23 |
|
24 |
return result.text
|
25 |
|
26 |
+
iface = gr.Interface(
|
27 |
+
fn=inference,
|
28 |
+
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
|
29 |
+
outputs="text"
|
30 |
+
)
|
31 |
+
|
32 |
+
# 启动 Gradio 界面
|
33 |
+
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|