import openai import whisper import gradio as gr import os #need to make a change to be able to push #This is another alternative, but this block allows for the detection of the language and it also provides lowever-level access to the model def transcribe(aud_inp, whisper_lang): if aud_inp is None: return '' model = whisper.load_audo('base') #load audo and pad/trim it to fit 30seconds audio = whisper.load_audio(aud_inp) audio = whisper.pad_or_trim(audio) #make log-Mel spectrogram and move to the same devcice as the model mel = whisper.log_mel_spectogram(audio).to(model.device) #detect the spoken language _,probs = model.detect_language(mel) print(f'Detected language: {max(probs, key=probs.get)}') #decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) print(result.text) return result block = gr.Blocks() def run(): with block: gr.Interface(fn=transcribe, inputs="microphone", outputs="text") block.launch(server_name='0.0.0.0', server_port=7860) if __name__ == '__main__': run() if __name__ == '__main__': run()