Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
from google.cloud import speech_v1 | |
from google.protobuf import timestamp_pb2 | |
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en") | |
#def transcribe(audio_bytes): | |
# """Transcribe audio bytes to text using Google Cloud Speech to Text.""" | |
# | |
# sr, y = audio_bytes | |
# y = y.astype(np.float32) | |
# y /= np.max(np.abs(y)) | |
# | |
# return transcriber({"sampling_rate": sr, "raw": y})["text"] | |
def transcribe(audio_bytes): | |
"""Transcribe audio bytes to text using Google Cloud Speech to Text.""" | |
# Crea un cliente de Speech to Text | |
client = speech_v1.SpeechClient() | |
# Configura la configuración de la solicitud | |
config = speech_v1.RecognitionConfig() | |
config.language_code = "es-ES" | |
config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16 | |
config.sample_rate_hertz = 16000 | |
# Crea una solicitud de reconocimiento de audio | |
audio = speech_v1.RecognitionAudio(content=audio_bytes) | |
request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio) | |
# Realiza la transcripción | |
response = client.recognize_speech(request) | |
# Extrae el texto transcrito | |
transcript = response.results[0].alternatives[0].transcript | |
return transcript | |
demo = gr.Interface( | |
transcribe, | |
gr.Audio(sources=["microphone"], streaming=False), | |
"text", | |
#live=True, # No muestra el botón de Submit. | |
) | |
demo.launch() | |