Spaces:

AllAideas
/

speech-to-text

Running

fcernafukuzaki commited on Mar 13, 2024

Commit

96f7c65

verified ·

1 Parent(s): 086ae79

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,23 +4,47 @@ import numpy as np
 from google.cloud import speech_v1
 from google.protobuf import timestamp_pb2
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 def transcribe(audio_bytes):
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
-    sr, y = audio_bytes
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    return transcriber({"sampling_rate": sr, "raw": y})["text"]
 demo = gr.Interface(
     transcribe,
     gr.Audio(sources=["microphone"], streaming=False),
     "text",
-    live=True,
 )
 demo.launch()

 from google.cloud import speech_v1
 from google.protobuf import timestamp_pb2
+#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+#def transcribe(audio_bytes):
+#    """Transcribe audio bytes to text using Google Cloud Speech to Text."""
+#
+#    sr, y = audio_bytes
+#    y = y.astype(np.float32)
+#    y /= np.max(np.abs(y))
+#
+#    return transcriber({"sampling_rate": sr, "raw": y})["text"]
 def transcribe(audio_bytes):
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
+    # Crea un cliente de Speech to Text
+    client = speech_v1.SpeechClient()
+    # Configura la configuración de la solicitud
+    config = speech_v1.RecognitionConfig()
+    config.language_code = "es-ES"
+    config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
+    config.sample_rate_hertz = 16000
+    # Crea una solicitud de reconocimiento de audio
+    audio = speech_v1.RecognitionAudio(content=audio_bytes)
+    request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
+    # Realiza la transcripción
+    response = client.recognize_speech(request)
+    # Extrae el texto transcrito
+    transcript = response.results[0].alternatives[0].transcript
+    return transcript
 demo = gr.Interface(
     transcribe,
     gr.Audio(sources=["microphone"], streaming=False),
     "text",
+    #live=True, # No muestra el botón de Submit.
 )
 demo.launch()