Spaces:

SeyedAli
/

Persian-Speech-Transcription

Running

SeyedAli commited on Sep 21, 2023

Commit

f1cb24f

1 Parent(s): 00efafc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -23,15 +23,12 @@ def ASR(audio):
         # waveform = resampler(waveform)
         # Convert the PyTorch tensor to a NumPy ndarray
         # Preprocess the audio file
-        input_values = processor(waveform.squeeze(0),sampling_rate=16_000, return_tensors="pt").input_values
         # Transcribe the audio file
         with torch.no_grad():
             logits = model(input_values).logits
-        # audio_array = waveform.numpy()
-        # inputs = processor(audio_array, sampling_rate=16_000)
-        # text = pipe(np.array(inputs))
         # Decode the transcription
-        transcription = processor.decode(torch.argmax(logits.to(torch.int64), dim=-1))
         return transcription
 iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
 iface.launch(share=False)

         # waveform = resampler(waveform)
         # Convert the PyTorch tensor to a NumPy ndarray
         # Preprocess the audio file
+        input_values = processor(waveform.squeeze().numpy(),sampling_rate=16_000, return_tensors="pt").input_values
         # Transcribe the audio file
         with torch.no_grad():
             logits = model(input_values).logits
         # Decode the transcription
+        transcription = processor.decode(torch.argmax(logits, dim=-1))
         return transcription
 iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
 iface.launch(share=False)