import gradio as gr from huggingface_hub import InferenceClient model_id = "JacobLinCool/whisper-large-v3-turbo-common_voice_16_1-zh-TW-2" client = InferenceClient(model_id) def transcribe_audio(audio: str) -> str: out = client.automatic_speech_recognition(audio) return out.text with gr.Blocks() as demo: gr.Markdown("# TWASR: Chinese (Taiwan) Automatic Speech Recognition.") gr.Markdown("Upload an audio file or record your voice to transcribe it to text.") gr.Markdown( "First load may take a while to initialize the model, following requests will be faster." ) with gr.Row(): audio_input = gr.Audio( label="Audio", type="filepath", show_download_button=True ) text_output = gr.Textbox(label="Transcription") transcribe_button = gr.Button("Transcribe with Inference API") transcribe_button.click( fn=transcribe_audio, inputs=[audio_input], outputs=[text_output] ) gr.Examples( [ ["./examples/audio1.mp3"], ["./examples/audio2.mp3"], ], inputs=[audio_input], outputs=[text_output], fn=transcribe_audio, cache_examples=True, cache_mode="lazy", run_on_click=True, ) gr.Markdown( f"Current model: {model_id}. For more information, visit the [model hub](https://huggingface.co/{model_id})." ) if __name__ == "__main__": demo.launch()