Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,25 @@
|
|
|
|
1 |
import os
|
2 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
3 |
import gradio as gr
|
4 |
import whisper
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
|
8 |
model = whisper.load_model("base")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
|
@@ -21,12 +35,18 @@ def inference(audio):
|
|
21 |
result = whisper.decode(model, mel, options)
|
22 |
|
23 |
return result.text
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
)
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
import os
|
3 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
4 |
import gradio as gr
|
5 |
import whisper
|
6 |
+
from transformers import pipeline
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
|
10 |
+
p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
|
11 |
|
12 |
|
13 |
|
14 |
model = whisper.load_model("base")
|
15 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
|
16 |
+
|
17 |
+
def transcribe(audio):
|
18 |
+
sr, y = audio
|
19 |
+
y = y.astype(np.float32)
|
20 |
+
y /= np.max(np.abs(y))
|
21 |
+
|
22 |
+
return transcriber({"sampling_rate": sr, "raw": y})["text"]
|
23 |
|
24 |
|
25 |
|
|
|
35 |
result = whisper.decode(model, mel, options)
|
36 |
|
37 |
return result.text
|
38 |
+
|
39 |
+
with gr.Blocks() as demo:
|
40 |
+
gr.Markdown("Flip text or image files using this demo.")
|
41 |
+
with gr.Tab("語音轉文字"):
|
42 |
+
fn=inference,
|
43 |
+
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
|
44 |
+
outputs="text"
|
45 |
+
with gr.Tab("Real Time Speech Recognition"):
|
46 |
+
with gr.Row():
|
47 |
+
transcribe,
|
48 |
+
gr.Audio(sources=["microphone"]),
|
49 |
+
"text",
|
50 |
+
|
51 |
+
|
52 |
+
demo.launch()
|