File size: 1,546 Bytes
d9d94b1
 
cb9846d
241d532
cb9846d
30214b5
d72f733
 
c94a411
cb9846d
d9d94b1
3a921e4
d9d94b1
 
 
3a921e4
d9d94b1
 
 
 
 
 
 
1180c79
d9d94b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb9846d
 
 
d9d94b1
 
 
 
 
cb9846d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import io
import os
import gradio as gr
from google.cloud import speech


rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)


def transcribe(file_name):
    """Transcribe audio bytes to text using Google Cloud Speech to Text."""
    if not file_name:
        # Crea un cliente de Speech to Text
        client = speech.SpeechClient()
    
        # Configura la configuración de la solicitud
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            enable_automatic_punctuation=True,
            audio_channel_count=1,
            language_code="es-AR",
        )
    
        # Crea una solicitud de reconocimiento de audio
        with io.open(file_name, "rb") as audio_file:
            content = audio_file.read()
            audio = speech.RecognitionAudio(content=content)
        
        # Realiza la transcripción
        response = client.recognize(request={"config": config, "audio": audio})
        
        transcript = []
        # Reads the response
        for result in response.results:
            print("Transcript: {}".format(result.alternatives[0].transcript))
            transcript.append(result.alternatives[0].transcript)
    
        return ' '.join(transcript)
    return ''


demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["microphone"], 
             type="filepath", # Crea un archivo temporal en formato wav
             streaming=False),
    "text"
)

demo.launch()