Spaces:
Running
Running
File size: 3,042 Bytes
cb9846d 13119ab a295231 241d532 086ae79 cb9846d 1180c79 c94a411 30214b5 a12b80a 30214b5 d72f733 c94a411 3a921e4 cb9846d 086ae79 3a921e4 07b76a5 cb9846d 96f7c65 241d532 96f7c65 241d532 0fb413a 241d532 96f7c65 241d532 d41cd68 60ff89a 1180c79 f549bec 96f7c65 f549bec 96f7c65 f549bec 96f7c65 cb9846d 3ab4a68 d2ebfcd 086ae79 96f7c65 cb9846d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import gradio as gr
from transformers import pipeline
import numpy as np
#from google.cloud import speech_v1
from google.cloud import speech
from google.protobuf import timestamp_pb2
import io
import os
"""Lista los archivos en la carpeta de ejecución."""
archivos = os.listdir()
print("\n".join(archivos))
print(os.getcwd())
rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
print('/'.join(rutas))
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
def transcribe(audio_bytes):
print(type(audio_bytes))
"""Transcribe audio bytes to text using Google Cloud Speech to Text."""
sr, y = audio_bytes
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
def transcribe_2(audio_bytes):
"""Transcribe audio bytes to text using Google Cloud Speech to Text."""
# Crea un cliente de Speech to Text
#client = speech_v1.SpeechClient()
client = speech.SpeechClient()
# Configura la configuración de la solicitud
#config = speech_v1.RecognitionConfig()
#config.language_code = "es-AR"
#config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
#config.sample_rate_hertz = 16000
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
enable_automatic_punctuation=True,
audio_channel_count=1,
language_code="es-AR",
)
# Crea una solicitud de reconocimiento de audio
#audio = speech_v1.RecognitionAudio(content=audio_bytes)
#request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
print(f"{type(audio_bytes)} {audio_bytes}")
file_name = audio_bytes
#sr, y = audio_bytes
#print(f"{type(sr)} {sr}")
#print(type(y))
#y = y.astype(np.float32)
#y /= np.max(np.abs(y))
#import scipy.io.wavfile as wav
#RATE = sr
#numpydata = y
#file_name = 'out.wav'
#wav.write(file_name, RATE, numpydata)
#the path of your audio file
with io.open(file_name, "rb") as audio_file:
content = audio_file.read()
audio = speech.RecognitionAudio(content=content)
#audio = speech.RecognitionAudio(content=audio_bytes)
# Sends the request to google to transcribe the audio
response = client.recognize(request={"config": config, "audio": audio})
transcript = []
# Reads the response
for result in response.results:
print("Transcript: {}".format(result.alternatives[0].transcript))
transcript.append(result.alternatives[0].transcript)
# Realiza la transcripción
#response = client.recognize_speech(request)
# Extrae el texto transcrito
#transcript = response.results[0].alternatives[0].transcript
return transcript
demo = gr.Interface(
transcribe_2,
gr.Audio(sources=["microphone"], type="filepath", streaming=False),
"text",
#live=True, # No muestra el botón de Submit.
)
demo.launch()
|