Spaces:

wang0507
/

shuaige

Sleeping

App Files Files Community

shuaige / app2.py

wang0507

Update app2.py

2988a3d almost 2 years ago

raw

history blame contribute delete

2.22 kB

	import os
	os.system("pip install git+https://github.com/openai/whisper.git")
	import gradio as gr
	import whisper
	from transformers import pipeline
	import numpy as np


	p = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")



	model = whisper.load_model("base")
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.ch")

	def transcribe(audio):
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	return transcriber({"sampling_rate": sr, "raw": y})["text"]



	def inference(audio):
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	options = whisper.DecodingOptions(fp16 = False)
	result = whisper.decode(model, mel, options)

	return result.text

	with gr.Blocks() as demo:
	gr.Markdown("Flip text or image files using this demo.")
	with gr.Tab("語音轉文字"):
	fn=inference,
	inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。"),
	outputs="text"
	with gr.Tab("Real Time Speech Recognition"):
	with gr.Row():
	transcribe,
	gr.Audio(sources=["microphone"]),
	"text",



	demo.launch()
	# 兩個頁面

	# ################################################################################################################################################
	import os
	os.system("pip install git+https://github.com/openai/whisper.git")
	import gradio as gr
	import whisper



	model = whisper.load_model("base")



	def inference(audio):
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	options = whisper.DecodingOptions(fp16 = False)
	result = whisper.decode(model, mel, options)

	return result.text

	iface = gr.Interface(
	fn=inference,
	inputs=gr.Audio(type="filepath", label="格式可為 WAV、MP3、OGG、FLAC、AAC、M4A、WMA，單聲道、多聲道均可。"),
	outputs="text"
	)

	iface.launch()