File size: 1,980 Bytes
d9d94b1
 
cb9846d
241d532
e04c18e
cb9846d
e04c18e
 
30214b5
e04c18e
 
 
cb9846d
d9d94b1
e04c18e
4de176d
a7dbc3a
3a921e4
e04c18e
 
 
4de176d
e04c18e
4de176d
 
0ce2c27
4de176d
 
 
 
e04c18e
4de176d
 
 
1180c79
4de176d
e04c18e
d9d94b1
4de176d
ea9a51d
e04c18e
 
4de176d
2a119dd
4de176d
 
ea9a51d
cb9846d
e04c18e
7634404
e04c18e
 
cb9846d
d9d94b1
e04c18e
7634404
cbcfe06
 
cb9846d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import io
import os
import gradio as gr
from google.cloud import speech
from google.api_core.client_options import ClientOptions

# Obtener la API Key desde las variables de entorno
API_KEY = os.getenv("GOOGLE_API_KEY")

# Verificar si la API Key está configurada
if not API_KEY:
    raise ValueError("La API Key de Google no está configurada. Configúrala en la variable de entorno GOOGLE_API_KEY.")

def transcribe(file_name):
    """Transcribe audio a texto usando Google Cloud Speech-to-Text con API Key."""
    if file_name is None:
        return '', ''
    
    # Configurar el cliente de Speech-to-Text con API Key
    client_options = ClientOptions(api_key=API_KEY)
    client = speech.SpeechClient(client_options=client_options)

    # Configuración de la solicitud
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=44100,
        audio_channel_count=1,
        language_code="es-AR",
    )

    # Cargar el audio en binario
    with io.open(file_name, "rb") as audio_file:
        content = audio_file.read()
        audio = speech.RecognitionAudio(content=content)
    
    # Realiza la transcripción
    response = client.recognize(config=config, audio=audio)
    
    transcript = []
    confidence = []
    
    # Lee la respuesta de la API
    for result in response.results:
        confidence.append(str(result.alternatives[0].confidence))
        transcript.append(result.alternatives[0].transcript)

    return ' '.join(transcript), '\n'.join(confidence)

# Configuración de la interfaz Gradio
output1 = gr.Textbox(label='Transcripción')
output2 = gr.Textbox(label='Confianza')

demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["microphone"], type="filepath", label='Grabar audio aquí', streaming=False),
    [output1, output2],
    title='Demo Reconocimiento de voz',
    description='<p>Grabar audio para convertir voz a texto usando IA.</p>'
)

demo.launch()