Spaces:

AllAideas
/

speech-to-text

Running

App Files Files Community

fcernafukuzaki commited on Mar 13, 2024

Commit

d9d94b1

verified ·

1 Parent(s): 60ff89a

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -79

app.py CHANGED Viewed

@@ -1,96 +1,51 @@
 import gradio as gr
-from transformers import pipeline
-import numpy as np
-#from google.cloud import speech_v1
 from google.cloud import speech
-from google.protobuf import timestamp_pb2
-import io
-import os
-"""Lista los archivos en la carpeta de ejecución."""
-archivos = os.listdir()
-print("\n".join(archivos))
-print(os.getcwd())
 rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
-print('/'.join(rutas))
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
-transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
-def transcribe(audio_bytes):
-    print(type(audio_bytes))
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
-    sr, y = audio_bytes
-    y = y.astype(np.float32)
-    y /= np.max(np.abs(y))
-    return transcriber({"sampling_rate": sr, "raw": y})["text"]
-def transcribe_2(audio_bytes):
-    """Transcribe audio bytes to text using Google Cloud Speech to Text."""
-    # Crea un cliente de Speech to Text
-    #client = speech_v1.SpeechClient()
-    client = speech.SpeechClient()
-    # Configura la configuración de la solicitud
-    #config = speech_v1.RecognitionConfig()
-    #config.language_code = "es-AR"
-    #config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
-    #config.sample_rate_hertz = 16000
-    config = speech.RecognitionConfig(
-        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
-        enable_automatic_punctuation=True,
-        audio_channel_count=1,
-        language_code="es-AR",
-    )
-    # Crea una solicitud de reconocimiento de audio
-    #audio = speech_v1.RecognitionAudio(content=audio_bytes)
-    #request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
-    print(f"{type(audio_bytes)} {audio_bytes}")
-    file_name = audio_bytes
-    #sr, y = audio_bytes
-    #print(f"{type(sr)} {sr}")
-    #print(type(y))
-    #y = y.astype(np.float32)
-    #y /= np.max(np.abs(y))
-    #import scipy.io.wavfile as wav
-    #RATE = sr
-    #numpydata = y
-    #file_name = 'out.wav'
-    #wav.write(file_name, RATE, numpydata)
-    #the path of your audio file
-    with io.open(file_name, "rb") as audio_file:
-        content = audio_file.read()
-        audio = speech.RecognitionAudio(content=content)
-    #audio = speech.RecognitionAudio(content=audio_bytes)
-    # Sends the request to google to transcribe the audio
-    response = client.recognize(request={"config": config, "audio": audio})
-    transcript = []
-    # Reads the response
-    for result in response.results:
-        print("Transcript: {}".format(result.alternatives[0].transcript))
-        transcript.append(result.alternatives[0].transcript)
-    # Realiza la transcripción
-    #response = client.recognize_speech(request)
-    # Extrae el texto transcrito
-    #transcript = response.results[0].alternatives[0].transcript
-    return transcript
 demo = gr.Interface(
-    transcribe_2,
-    gr.Audio(sources=["microphone"], type="filepath", streaming=False),
-    "text",
-    #live=True, # No muestra el botón de Submit.
 )
 demo.launch()

+import io
+import os
 import gradio as gr
 from google.cloud import speech
 rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
+def transcribe(file_name):
     """Transcribe audio bytes to text using Google Cloud Speech to Text."""
+    if not file_name:
+        # Crea un cliente de Speech to Text
+        client = speech.SpeechClient()
+        # Configura la configuración de la solicitud
+        config = speech.RecognitionConfig(
+            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
+            enable_automatic_punctuation=True,
+            audio_channel_count=1,
+            language_code="es-AR",
+        )
+        # Crea una solicitud de reconocimiento de audio
+        with io.open(file_name, "rb") as audio_file:
+            content = audio_file.read()
+            audio = speech.RecognitionAudio(content=content)
+        # Realiza la transcripción
+        response = client.recognize(request={"config": config, "audio": audio})
+        transcript = []
+        # Reads the response
+        for result in response.results:
+            print("Transcript: {}".format(result.alternatives[0].transcript))
+            transcript.append(result.alternatives[0].transcript)
+        return ' '.join(transcript)
+    return ''
 demo = gr.Interface(
+    transcribe,
+    gr.Audio(sources=["microphone"],
+             type="filepath", # Crea un archivo temporal en formato wav
+             streaming=False),
+    "text"
 )
 demo.launch()