Spaces:

wang0507
/

shuaige

Sleeping

wang0507 commited on Nov 16, 2023

Commit

4dfea18

1 Parent(s): 3cc9351

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,21 +1,14 @@
 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
-import numpy as np
 model = whisper.load_model("base")
-def transcribe(stream, new_chunk):
-    sr, y = new_chunk
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    if stream is not None:
-        stream = np.concatenate([stream, y])
-    else:
-        stream = y
-    return stream, whisper.transcribe(stream, model=model, sampling_rate=sr)["text"]
 def inference(audio):
     audio = whisper.load_audio(audio)
@@ -25,26 +18,17 @@ def inference(audio):
     _, probs = model.detect_language(mel)
-    options = whisper.DecodingOptions(fp16=False)
     result = whisper.decode(model, mel, options)
     return result.text
-with gr.Blocks() as demo:
-    gr.Markdown("Flip text or image files using this demo.")
-    with gr.Tab("語音轉文字"):
-        with gr.Row():
-            inputs = gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。")
-            outputs = "text"
-            fn = inference
-            iface = gr.Interface(fn=fn, inputs=inputs, outputs=outputs)
-            ["state", iface]
-    with gr.Tab("Real Time Speech Recognition"):
-        with gr.Row():
-            transcribe
-            ["state", gr.Audio(sources=["microphone"], streaming=True)],
-            ["state", "text"],
-            live=True
-demo.launch()

 import os
 os.system("pip install git+https://github.com/openai/whisper.git")
 import gradio as gr
 import whisper
 model = whisper.load_model("base")
 def inference(audio):
     audio = whisper.load_audio(audio)
     _, probs = model.detect_language(mel)
+    options = whisper.DecodingOptions(fp16 = False)
     result = whisper.decode(model, mel, options)
     return result.text
+iface = gr.Interface(
+    fn=inference,
+    inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。"),
+    outputs="text"
+)
+# 启动 Gradio 界面
+iface.launch()