Update app.py
Browse files
app.py
CHANGED
|
@@ -3,23 +3,10 @@ import os
|
|
| 3 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
| 4 |
import gradio as gr
|
| 5 |
import whisper
|
| 6 |
-
from transformers import pipeline
|
| 7 |
-
import numpy as np
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
|
| 11 |
|
| 12 |
|
| 13 |
|
| 14 |
model = whisper.load_model("base")
|
| 15 |
-
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
|
| 16 |
-
|
| 17 |
-
def transcribe(audio):
|
| 18 |
-
sr, y = audio
|
| 19 |
-
y = y.astype(np.float32)
|
| 20 |
-
y /= np.max(np.abs(y))
|
| 21 |
-
|
| 22 |
-
return transcriber({"sampling_rate": sr, "raw": y})["text"]
|
| 23 |
|
| 24 |
|
| 25 |
|
|
@@ -35,19 +22,12 @@ def inference(audio):
|
|
| 35 |
result = whisper.decode(model, mel, options)
|
| 36 |
|
| 37 |
return result.text
|
| 38 |
-
|
| 39 |
-
with gr.Blocks() as demo:
|
| 40 |
-
gr.Markdown("Flip text or image files using this demo.")
|
| 41 |
-
with gr.Tab("語音轉文字"):
|
| 42 |
-
fn=inference,
|
| 43 |
-
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
|
| 44 |
-
outputs="text"
|
| 45 |
-
with gr.Tab("Real Time Speech Recognition"):
|
| 46 |
-
transcribe,
|
| 47 |
-
gr.Audio(sources=["microphone"]),
|
| 48 |
-
"text",
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
# 启动 Gradio 界面
|
| 52 |
-
|
| 53 |
-
|
|
|
|
| 3 |
os.system("pip install git+https://github.com/openai/whisper.git")
|
| 4 |
import gradio as gr
|
| 5 |
import whisper
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
|
| 9 |
model = whisper.load_model("base")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
|
|
|
|
| 22 |
result = whisper.decode(model, mel, options)
|
| 23 |
|
| 24 |
return result.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
iface = gr.Interface(
|
| 27 |
+
fn=inference,
|
| 28 |
+
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
|
| 29 |
+
outputs="text"
|
| 30 |
+
)
|
| 31 |
|
| 32 |
# 启动 Gradio 界面
|
| 33 |
+
iface.launch()
|
|
|