Spaces:
Running
Running
File size: 2,545 Bytes
d9d94b1 ae874ba cb9846d 636fe04 2c6849a cb9846d 2c6849a ae874ba 2c6849a ae874ba 2c6849a bdb0292 2c6849a cb9846d 636fe04 2c6849a 636fe04 bdb0292 2c6849a bdb0292 636fe04 bdb0292 4de176d 2c6849a 4de176d 2c6849a 4de176d 2c6849a cb9846d bdb0292 7634404 e04c18e cb9846d bdb0292 2c6849a cb9846d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import io
import gradio as gr
import subprocess
from google.cloud import speech
from google.api_core.client_options import ClientOptions
# Obtener la API Key desde las variables de entorno
try:
API_KEY = os.environ["GOOGLE_API_KEY"]
except KeyError:
raise ValueError("La API Key de Google no está disponible. Configúrala en los Secrets como 'GOOGLE_API_KEY'.")
# Configurar cliente de Google Speech-to-Text con API Key
client_options = ClientOptions(api_key=API_KEY)
client = speech.SpeechClient(client_options=client_options)
def convert_to_wav(input_file):
"""Convierte archivos de audio a formato WAV LINEAR16 si es necesario."""
output_file = input_file + ".wav"
command = [
"ffmpeg", "-y", "-i", input_file,
"-acodec", "pcm_s16le", "-ar", "44100", "-ac", "1", output_file
]
subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return output_file
def transcribe(audio_file=None):
"""Transcribe audio a texto usando Google Cloud Speech-to-Text."""
if audio_file is None:
return "No se ha seleccionado ningún archivo.", ""
# Convertir a WAV si es necesario
if not audio_file.endswith(".wav"):
audio_file = convert_to_wav(audio_file)
# Configuración de la solicitud
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
audio_channel_count=1,
language_code="es-AR",
)
# Cargar el audio en binario
with io.open(audio_file, "rb") as file:
content = file.read()
audio = speech.RecognitionAudio(content=content)
# Realiza la transcripción
response = client.recognize(config=config, audio=audio)
transcript = []
confidence = []
# Leer la respuesta de la API
for result in response.results:
confidence.append(str(result.alternatives[0].confidence))
transcript.append(result.alternatives[0].transcript)
return ' '.join(transcript), '\n'.join(confidence)
# Configuración de la interfaz Gradio
output1 = gr.Textbox(label='Transcripción')
output2 = gr.Textbox(label='Confianza')
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Subir o grabar audio"),
outputs=[output1, output2],
title='Demo Speech-to-Text con Google Cloud',
description='<p>Grabar o subir un archivo de audio para convertir voz a texto usando Google Cloud Speech-to-Text.</p>'
)
demo.launch()
|