Spaces:

pratikshahp
/

audio-to-text

Sleeping

pratikshahp commited on Apr 1, 2024

Commit

16d11ec

verified ·

1 Parent(s): a976d7a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,12 +3,16 @@ from transformers import Speech2TextProcessor, Speech2TextForConditionalGenerati
 from audio_recorder_streamlit import audio_recorder
 import numpy as np
 import streamlit as st
 def transcribe_audio(audio_bytes):
     model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-mustc-en-fr-st")
     processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-mustc-en-fr-st")
-    generated_ids = model.generate(input_ids=audio_bytes["input_features"], attention_mask=audio_bytes["attention_mask"])
     translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
     return translation
@@ -25,4 +29,4 @@ if audio_bytes:
     else:
         st.write("Error: Failed to transcribe audio.")
 else:
-    st.write("No audio recorded.")

 from audio_recorder_streamlit import audio_recorder
 import numpy as np
 import streamlit as st
 def transcribe_audio(audio_bytes):
     model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-mustc-en-fr-st")
     processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-mustc-en-fr-st")
+    # Convert audio bytes to tensors
+    input_features = torch.tensor(audio_bytes).unsqueeze(0)  # Assuming audio_bytes is numpy array
+    # Generate transcription
+    generated_ids = model.generate(input_features)
     translation = processor.batch_decode(generated_ids, skip_special_tokens=True)
     return translation
     else:
         st.write("Error: Failed to transcribe audio.")
 else:
+    st.write("No audio recorded.")