Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
#from google.cloud import speech_v1 | |
from google.cloud import speech | |
from google.protobuf import timestamp_pb2 | |
import io | |
import os | |
"""Lista los archivos en la carpeta de ejecución.""" | |
archivos = os.listdir() | |
print("\n".join(archivos)) | |
print(os.getcwd()) | |
rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"] | |
print('/'.join(rutas)) | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas) | |
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") | |
def transcribe(audio_bytes): | |
print(type(audio_bytes)) | |
"""Transcribe audio bytes to text using Google Cloud Speech to Text.""" | |
sr, y = audio_bytes | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
return transcriber({"sampling_rate": sr, "raw": y})["text"] | |
def transcribe_2(audio_bytes): | |
"""Transcribe audio bytes to text using Google Cloud Speech to Text.""" | |
# Crea un cliente de Speech to Text | |
#client = speech_v1.SpeechClient() | |
client = speech.SpeechClient() | |
# Configura la configuración de la solicitud | |
#config = speech_v1.RecognitionConfig() | |
#config.language_code = "es-AR" | |
#config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16 | |
#config.sample_rate_hertz = 16000 | |
config = speech.RecognitionConfig( | |
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, | |
enable_automatic_punctuation=True, | |
audio_channel_count=1, | |
language_code="es-AR", | |
) | |
# Crea una solicitud de reconocimiento de audio | |
#audio = speech_v1.RecognitionAudio(content=audio_bytes) | |
#request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio) | |
print(f"{type(audio_bytes)} {audio_bytes}") | |
file_name = audio_bytes | |
#sr, y = audio_bytes | |
#print(f"{type(sr)} {sr}") | |
#print(type(y)) | |
#y = y.astype(np.float32) | |
#y /= np.max(np.abs(y)) | |
#import scipy.io.wavfile as wav | |
#RATE = sr | |
#numpydata = y | |
#file_name = 'out.wav' | |
#wav.write(file_name, RATE, numpydata) | |
#the path of your audio file | |
with io.open(file_name, "rb") as audio_file: | |
content = audio_file.read() | |
audio = speech.RecognitionAudio(content=content) | |
#audio = speech.RecognitionAudio(content=audio_bytes) | |
# Sends the request to google to transcribe the audio | |
response = client.recognize(request={"config": config, "audio": audio}) | |
transcript = [] | |
# Reads the response | |
for result in response.results: | |
print("Transcript: {}".format(result.alternatives[0].transcript)) | |
transcript.append(result.alternatives[0].transcript) | |
# Realiza la transcripción | |
#response = client.recognize_speech(request) | |
# Extrae el texto transcrito | |
#transcript = response.results[0].alternatives[0].transcript | |
return transcript | |
demo = gr.Interface( | |
transcribe_2, | |
gr.Audio(sources=["microphone"], type="filepath", streaming=False), | |
"text", | |
#live=True, # No muestra el botón de Submit. | |
) | |
demo.launch() | |