import gradio as gr import os from sidlingvo import wav_to_lang from huggingface_hub import hf_hub_download title = "Speaker Recognition Demo" description = """ A demo of conformer-based speaker recognition. Paper: https://arxiv.org/abs/2104.02125 """ repo_id = "tflite-hub/conformer-lang-id" model_path = "models" hf_hub_download(repo_id=repo_id, filename="vad_short_model.tflite", local_dir=model_path) hf_hub_download(repo_id=repo_id, filename="vad_short_mean_stddev.csv", local_dir=model_path) hf_hub_download(repo_id=repo_id, filename="conformer_langid_medium.tflite", local_dir=model_path) runner = wav_to_lang.WavToLangRunner( vad_model_file=os.path.join(model_path, "vad_short_model.tflite"), vad_mean_stddev_file=os.path.join(model_path, "vad_short_mean_stddev.csv"), langid_model_file=os.path.join(model_path, "conformer_langid_medium.tflite")) def predict(enroll_audio, test_audio): top_lang, _ = runner.wav_to_lang(enroll_audio) return "Predicted language:" + top_lang if __name__ == "__main__": demo = gr.Interface( fn=predict, inputs=[gr.Audio(type="filepath"), gr.Audio(type="filepath")], outputs="text", title=title, description=description,) demo.launch()