import gradio as gr from transformers import pipeline import numpy as np #from google.cloud import speech_v1 from google.cloud import speech from google.protobuf import timestamp_pb2 import io import os """Lista los archivos en la carpeta de ejecución.""" archivos = os.listdir() print("\n".join(archivos)) print(os.getcwd()) rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"] print('/'.join(rutas)) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas) transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") def transcribe(audio_bytes): print(type(audio_bytes)) """Transcribe audio bytes to text using Google Cloud Speech to Text.""" sr, y = audio_bytes y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] def transcribe_2(audio_bytes): """Transcribe audio bytes to text using Google Cloud Speech to Text.""" # Crea un cliente de Speech to Text #client = speech_v1.SpeechClient() client = speech.SpeechClient() # Configura la configuración de la solicitud #config = speech_v1.RecognitionConfig() #config.language_code = "es-AR" #config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16 #config.sample_rate_hertz = 16000 config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, enable_automatic_punctuation=True, audio_channel_count=1, language_code="es-AR", ) # Crea una solicitud de reconocimiento de audio #audio = speech_v1.RecognitionAudio(content=audio_bytes) #request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio) print(f"{type(audio_bytes)} {audio_bytes}") file_name = audio_bytes #sr, y = audio_bytes #print(f"{type(sr)} {sr}") #print(type(y)) #y = y.astype(np.float32) #y /= np.max(np.abs(y)) #import scipy.io.wavfile as wav #RATE = sr #numpydata = y #file_name = 'out.wav' #wav.write(file_name, RATE, numpydata) #the path of your audio file with io.open(file_name, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) #audio = speech.RecognitionAudio(content=audio_bytes) # Sends the request to google to transcribe the audio response = client.recognize(request={"config": config, "audio": audio}) transcript = [] # Reads the response for result in response.results: print("Transcript: {}".format(result.alternatives[0].transcript)) transcript.append(result.alternatives[0].transcript) # Realiza la transcripción #response = client.recognize_speech(request) # Extrae el texto transcrito #transcript = response.results[0].alternatives[0].transcript return transcript demo = gr.Interface( transcribe_2, gr.Audio(sources=["microphone"], type="filepath", streaming=False), "text", #live=True, # No muestra el botón de Submit. ) demo.launch()