|
import os |
|
os.system("pip install git+https://github.com/openai/whisper.git") |
|
import gradio as gr |
|
import whisper |
|
from transformers import pipeline |
|
import numpy as np |
|
|
|
|
|
p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch") |
|
|
|
|
|
|
|
model = whisper.load_model("base") |
|
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch") |
|
|
|
def transcribe(audio): |
|
sr, y = audio |
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
|
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
|
|
|
|
def inference(audio): |
|
audio = whisper.load_audio(audio) |
|
audio = whisper.pad_or_trim(audio) |
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
|
_, probs = model.detect_language(mel) |
|
|
|
options = whisper.DecodingOptions(fp16 = False) |
|
result = whisper.decode(model, mel, options) |
|
|
|
return result.text |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("Flip text or image files using this demo.") |
|
with gr.Tab("語音轉文字"): |
|
fn=inference, |
|
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"), |
|
outputs="text" |
|
with gr.Tab("Real Time Speech Recognition"): |
|
with gr.Row(): |
|
transcribe, |
|
gr.Audio(sources=["microphone"]), |
|
"text", |
|
|
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|
|
import os |
|
os.system("pip install git+https://github.com/openai/whisper.git") |
|
import gradio as gr |
|
import whisper |
|
|
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
|
|
def inference(audio): |
|
audio = whisper.load_audio(audio) |
|
audio = whisper.pad_or_trim(audio) |
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
|
_, probs = model.detect_language(mel) |
|
|
|
options = whisper.DecodingOptions(fp16 = False) |
|
result = whisper.decode(model, mel, options) |
|
|
|
return result.text |
|
|
|
iface = gr.Interface( |
|
fn=inference, |
|
inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA,單聲道、多聲道均可。"), |
|
outputs="text" |
|
) |
|
|
|
iface.launch() |