Spaces:

akhaliq
/

s2t-wav2vec2-large-en-de

Runtime error

App Files Files Community

Ahsen Khaliq commited on Sep 11, 2021

Commit

a4f8f10

1 Parent(s): a5805fe

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -1,18 +1,26 @@
 import soundfile as sf
 import gradio as gr
 import torch
-from transformers import pipeline
-asr = pipeline("automatic-speech-recognition", model="facebook/s2t-wav2vec2-large-en-de", feature_extractor="facebook/s2t-wav2vec2-large-en-de")
 def inference(audio):
-  translation_de = asr(audio.name)
-  return translation_de[0]
 inputs = gr.inputs.Audio(label="Input Audio", type="file")
 outputs =  gr.outputs.Textbox(label="Output Text")
 title = "Robust wav2vec 2.0"
 description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
-gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch()

+import os
+os.system('pip freeze')
+os.system('pip install transformers --upgrade')
+os.system('pip freeze')
 import soundfile as sf
 import gradio as gr
 import torch
+from transformers import Speech2Text2Processor, SpeechEncoderDecoder
+model = SpeechEncoderDecoder.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+processor = Speech2Text2Processor.from_pretrained("facebook/s2t-wav2vec2-large-en-de")
+def map_to_array(file):
+   speech, _ = sf.read(file)
+   return speech
 def inference(audio):
+  inputs = processor(map_to_array(audio.name), sampling_rate=16_000, return_tensors="pt")
+  generated_ids = model.generate(input_ids=inputs["input_features"], attention_mask=inputs["attention_mask"])
+  transcription = processor.batch_decode(generated_ids)
+  return transcription[0]
 inputs = gr.inputs.Audio(label="Input Audio", type="file")
 outputs =  gr.outputs.Textbox(label="Output Text")
 title = "Robust wav2vec 2.0"
 description = "Gradio demo for Robust wav2vec 2.0. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav and .flac files"
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.01027' target='_blank'>Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training</a> | <a href='https://github.com/pytorch/fairseq' target='_blank'>Github Repo</a></p>"
+examples=[['poem.wav']]
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()