from transformers import pipeline import gradio as gr pipe = pipeline(model="bhuang/wav2vec2-xls-r-1b-cv9-fr") def transcribe(audio, state=""): text = pipe(audio, chunk_length_s=5, stride_length_s=1)["text"] state += text + " " return state, state # streaming mode iface = gr.Interface( fn=transcribe, inputs=[ gr.Audio(source="microphone", type="filepath", streaming=True, label="Record something..."), "state" ], outputs=[ "textbox", "state" ], title="Realtime ASR in French", # description="Realtime demo for French ASR using a fine-tuned wav2vec2 model.", allow_flagging="never", live=True ) iface.launch()