Spaces:

AllAideas
/

speech-to-text

Running

fcernafukuzaki commited on Mar 13, 2024

Commit

d9607d1

verified ·

1 Parent(s): 9163b2a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from google.cloud import speech_v1
 from google.protobuf import timestamp_pb2
-def transcribe(audio_bytes):
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
     # Crea un cliente de Speech to Text
@@ -19,19 +19,24 @@ def transcribe(audio_bytes):
     audio = speech_v1.RecognitionAudio(content=audio_bytes)
     request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
-    # Realiza la transcripción
-    response = client.recognize_speech(request)
-    # Extrae el texto transcrito
-    transcript = response.results[0].alternatives[0].transcript
-    return transcript
 demo = gr.Interface(
     transcribe,
-    gr.Audio(sources=["microphone"], streaming=True),
-    "text",
     live=True,
 )

 from google.protobuf import timestamp_pb2
+def transcribe(stream, audio_bytes):
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
     # Crea un cliente de Speech to Text
     audio = speech_v1.RecognitionAudio(content=audio_bytes)
     request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
+    sr, y = audio_bytes
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    if stream is not None:
+        # Realiza la transcripción
+        response = client.recognize_speech(request)
+        # Extrae el texto transcrito
+        transcript = response.results[0].alternatives[0].transcript
+    else:
+        stream = y
+    return stream, transcript
 demo = gr.Interface(
     transcribe,
+    ["state", gr.Audio(sources=["microphone"], streaming=True)],
+    ["state", "text"],
     live=True,
 )