Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
import numpy as np
|
3 |
-
from google.cloud import speech_v1
|
4 |
-
from google.protobuf import timestamp_pb2
|
5 |
|
|
|
6 |
|
7 |
def transcribe(stream, audio_bytes):
|
8 |
"""Transcribe audio bytes to text using Google Cloud Speech to Text."""
|
@@ -11,27 +13,10 @@ def transcribe(stream, audio_bytes):
|
|
11 |
y = y.astype(np.float32)
|
12 |
y /= np.max(np.abs(y))
|
13 |
if stream is not None:
|
14 |
-
|
15 |
-
client = speech_v1.SpeechClient()
|
16 |
-
|
17 |
-
# Configura la configuración de la solicitud
|
18 |
-
config = speech_v1.RecognitionConfig()
|
19 |
-
config.language_code = "es-ES"
|
20 |
-
config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
|
21 |
-
config.sample_rate_hertz = 16000
|
22 |
-
|
23 |
-
# Crea una solicitud de reconocimiento de audio
|
24 |
-
audio = speech_v1.RecognitionAudio(content=audio_bytes)
|
25 |
-
request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
|
26 |
-
|
27 |
-
# Realiza la transcripción
|
28 |
-
response = client.recognize_speech(request)
|
29 |
-
|
30 |
-
# Extrae el texto transcrito
|
31 |
-
transcript = response.results[0].alternatives[0].transcript
|
32 |
else:
|
33 |
stream = y
|
34 |
-
return stream,
|
35 |
|
36 |
|
37 |
demo = gr.Interface(
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
import numpy as np
|
4 |
+
#from google.cloud import speech_v1
|
5 |
+
#from google.protobuf import timestamp_pb2
|
6 |
|
7 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
8 |
|
9 |
def transcribe(stream, audio_bytes):
|
10 |
"""Transcribe audio bytes to text using Google Cloud Speech to Text."""
|
|
|
13 |
y = y.astype(np.float32)
|
14 |
y /= np.max(np.abs(y))
|
15 |
if stream is not None:
|
16 |
+
stream = np.concatenate([stream, y])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
else:
|
18 |
stream = y
|
19 |
+
return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]
|
20 |
|
21 |
|
22 |
demo = gr.Interface(
|