Spaces:
Running
Running
File size: 2,558 Bytes
cb9846d 13119ab a295231 241d532 086ae79 cb9846d c94a411 30214b5 a12b80a 30214b5 d72f733 c94a411 96f7c65 cb9846d 086ae79 07b76a5 cb9846d 96f7c65 241d532 96f7c65 241d532 96f7c65 241d532 72805bd f549bec 96f7c65 f549bec 96f7c65 f549bec 96f7c65 cb9846d 086ae79 96f7c65 cb9846d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gradio as gr
from transformers import pipeline
import numpy as np
#from google.cloud import speech_v1
from google.cloud import speech
from google.protobuf import timestamp_pb2
import os
"""Lista los archivos en la carpeta de ejecución."""
archivos = os.listdir()
print("\n".join(archivos))
print(os.getcwd())
rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
print('/'.join(rutas))
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
#def transcribe(audio_bytes):
# """Transcribe audio bytes to text using Google Cloud Speech to Text."""
#
# sr, y = audio_bytes
# y = y.astype(np.float32)
# y /= np.max(np.abs(y))
#
# return transcriber({"sampling_rate": sr, "raw": y})["text"]
def transcribe(audio_bytes):
"""Transcribe audio bytes to text using Google Cloud Speech to Text."""
# Crea un cliente de Speech to Text
#client = speech_v1.SpeechClient()
client = speech.SpeechClient()
# Configura la configuración de la solicitud
#config = speech_v1.RecognitionConfig()
#config.language_code = "es-AR"
#config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
#config.sample_rate_hertz = 16000
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
enable_automatic_punctuation=True,
audio_channel_count=2,
language_code="es-AR",
)
# Crea una solicitud de reconocimiento de audio
#audio = speech_v1.RecognitionAudio(content=audio_bytes)
#request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
print(type(audio_bytes))
sr, y = audio_bytes
y = y.astype(np.float32)
y /= np.max(np.abs(y))
audio = speech.RecognitionAudio(content=y)
# Sends the request to google to transcribe the audio
response = client.recognize(request={"config": config, "audio": audio})
transcript = []
# Reads the response
for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))
transcript.append(result.alternatives[0].transcript)
# Realiza la transcripción
#response = client.recognize_speech(request)
# Extrae el texto transcrito
#transcript = response.results[0].alternatives[0].transcript
return transcript
demo = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"], streaming=False),
"text",
#live=True, # No muestra el botón de Submit.
)
demo.launch()
|