shuaige / app2.py
wang0507's picture
Update app2.py
2988a3d
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from transformers import pipeline
import numpy as np
p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
model = whisper.load_model("base")
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
def inference(audio):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
options = whisper.DecodingOptions(fp16 = False)
result = whisper.decode(model, mel, options)
return result.text
with gr.Blocks() as demo:
gr.Markdown("Flip text or image files using this demo.")
with gr.Tab("語音轉文字"):
fn=inference,
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
outputs="text"
with gr.Tab("Real Time Speech Recognition"):
with gr.Row():
transcribe,
gr.Audio(sources=["microphone"]),
"text",
demo.launch()
# 兩個頁面
# ################################################################################################################################################
import os
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
model = whisper.load_model("base")
def inference(audio):
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
options = whisper.DecodingOptions(fp16 = False)
result = whisper.decode(model, mel, options)
return result.text
iface = gr.Interface(
fn=inference,
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"),
outputs="text"
)
iface.launch()