import openai import whisper import gradio as gr import os app = gr.Blocks() def transcribe(aud_inp, whisper_lang): if aud_inp is None: return '' model = whisper.load_audo('base') #load audo and pad/trim it to fit 30seconds audio = whisper.load_audio(aud_inp) audio = whisper.pad_or_trim(audio) #make log-Mel spectrogram and move to the same devcice as the model mel = whisper.log_mel_spectogram(audio).to(model.device) #detect the spoken language _,probs = model.detect_language(mel) print(f'Detected language: {max(probs, key=probs.get)}') #decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) print(result.text) def run(): with app: gr.Interface(fn=transcribe, inputs="microphone", outputs="text") app.launch(server_name='0.0.0.0', server_port=7860) if __name__ == '__main__': run()