|
from transformers import pipeline |
|
import gradio as gr |
|
import time |
|
|
|
pipe = pipeline( |
|
model="dvislobokov/whisper-large-v3-turbo-russian", |
|
tokenizer="dvislobokov/whisper-large-v3-turbo-russian", |
|
task='automatic-speech-recognition', |
|
device='cpu' |
|
) |
|
|
|
def transcribe(audio): |
|
start = time.time() |
|
text = pipe(audio, return_timestamps=True)['text'] |
|
spent_time = (time.time() - start) |
|
return f'Spent time: {spent_time}\nText: {text}' |
|
|
|
iface = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(sources=['microphone', 'upload'], type='filepath'), |
|
outputs='text' |
|
) |
|
|
|
iface.launch(share=True) |
|
|