Spaces:

AllAideas
/

speech-to-text

Running

App Files Files Community

cesar commited on Feb 3

Commit

636fe04

verified ·

1 Parent(s): cbcfe06

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -8

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
-import io
 import os
 import gradio as gr
 from google.cloud import speech
 from google.api_core.client_options import ClientOptions
@@ -11,11 +12,25 @@ API_KEY = os.getenv("GOOGLE_API_KEY")
 if not API_KEY:
     raise ValueError("La API Key de Google no está configurada. Configúrala en la variable de entorno GOOGLE_API_KEY.")
-def transcribe(file_name):
     """Transcribe audio a texto usando Google Cloud Speech-to-Text con API Key."""
-    if file_name is None:
         return '', ''
     # Configurar el cliente de Speech-to-Text con API Key
     client_options = ClientOptions(api_key=API_KEY)
     client = speech.SpeechClient(client_options=client_options)
@@ -29,7 +44,7 @@ def transcribe(file_name):
     )
     # Cargar el audio en binario
-    with io.open(file_name, "rb") as audio_file:
         content = audio_file.read()
         audio = speech.RecognitionAudio(content=content)
@@ -46,16 +61,19 @@ def transcribe(file_name):
     return ' '.join(transcript), '\n'.join(confidence)
-# Configuración de la interfaz Gradio
 output1 = gr.Textbox(label='Transcripción')
 output2 = gr.Textbox(label='Confianza')
 demo = gr.Interface(
     transcribe,
-    gr.Audio(sources=["microphone"], type="filepath", label='Grabar audio aquí', streaming=False),
     [output1, output2],
-    title='Demo Reconocimiento de voz',
-    description='<p>Grabar audio para convertir voz a texto usando IA.</p>'
 )
 demo.launch()

 import os
+import io
 import gradio as gr
+import subprocess
 from google.cloud import speech
 from google.api_core.client_options import ClientOptions
 if not API_KEY:
     raise ValueError("La API Key de Google no está configurada. Configúrala en la variable de entorno GOOGLE_API_KEY.")
+def convert_to_wav(input_file):
+    """Convierte archivos de audio a formato WAV LINEAR16 si es necesario."""
+    output_file = input_file + ".wav"
+    command = [
+        "ffmpeg", "-y", "-i", input_file,
+        "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "1", output_file
+    ]
+    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    return output_file
+def transcribe(file_path):
     """Transcribe audio a texto usando Google Cloud Speech-to-Text con API Key."""
+    if file_path is None:
         return '', ''
+    # Convertir a formato WAV si es necesario
+    if not file_path.endswith(".wav"):
+        file_path = convert_to_wav(file_path)
     # Configurar el cliente de Speech-to-Text con API Key
     client_options = ClientOptions(api_key=API_KEY)
     client = speech.SpeechClient(client_options=client_options)
     )
     # Cargar el audio en binario
+    with io.open(file_path, "rb") as audio_file:
         content = audio_file.read()
         audio = speech.RecognitionAudio(content=content)
     return ' '.join(transcript), '\n'.join(confidence)
+# Configuración de la interfaz Gradio con opciones de grabación y subida de archivos
 output1 = gr.Textbox(label='Transcripción')
 output2 = gr.Textbox(label='Confianza')
 demo = gr.Interface(
     transcribe,
+    [
+        gr.Audio(sources=["microphone"], type="filepath", label='Grabar audio aquí', streaming=False),
+        gr.File(label="Subir archivo de audio")
+    ],
     [output1, output2],
+    title='Demo Reconocimiento de Voz con Google',
+    description='<p>Grabar o subir un archivo de audio para convertir voz a texto usando Google Cloud Speech-to-Text.</p>'
 )
 demo.launch()