Spaces:

wang0507
/

shuaige

Sleeping

wang0507 commited on Nov 16, 2023

Commit

b66022d

1 Parent(s): b685b14

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,25 @@
 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
 model = whisper.load_model("base")
@@ -21,12 +35,18 @@ def inference(audio):
     result = whisper.decode(model, mel, options)
     return result.text
-iface = gr.Interface(
-    fn=inference,
-    inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。"),
-    outputs="text"
-)
-# 启动 Gradio 界面
-iface.launch()

 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
+from transformers import pipeline
+import numpy as np
+p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
 model = whisper.load_model("base")
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
+def transcribe(audio):
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
     result = whisper.decode(model, mel, options)
     return result.text
+with gr.Blocks() as demo:
+    gr.Markdown("Flip text or image files using this demo.")
+    with gr.Tab("語音轉文字"):
+            fn=inference,
+            inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。"),
+            outputs="text"
+    with gr.Tab("Real Time Speech Recognition"):
+        with gr.Row():
+            transcribe,
+        gr.Audio(sources=["microphone"]),
+        "text",
+demo.launch()