fcernafukuzaki commited on
Commit
96f7c65
·
verified ·
1 Parent(s): 086ae79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -7
app.py CHANGED
@@ -4,23 +4,47 @@ import numpy as np
4
  from google.cloud import speech_v1
5
  from google.protobuf import timestamp_pb2
6
 
7
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
 
 
 
 
 
 
 
 
8
 
9
  def transcribe(audio_bytes):
10
  """Transcribe audio bytes to text using Google Cloud Speech to Text."""
11
 
12
- sr, y = audio_bytes
13
- y = y.astype(np.float32)
14
- y /= np.max(np.abs(y))
15
-
16
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  demo = gr.Interface(
20
  transcribe,
21
  gr.Audio(sources=["microphone"], streaming=False),
22
  "text",
23
- live=True,
24
  )
25
 
26
  demo.launch()
 
4
  from google.cloud import speech_v1
5
  from google.protobuf import timestamp_pb2
6
 
7
+ #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
8
+
9
+ #def transcribe(audio_bytes):
10
+ # """Transcribe audio bytes to text using Google Cloud Speech to Text."""
11
+ #
12
+ # sr, y = audio_bytes
13
+ # y = y.astype(np.float32)
14
+ # y /= np.max(np.abs(y))
15
+ #
16
+ # return transcriber({"sampling_rate": sr, "raw": y})["text"]
17
 
18
  def transcribe(audio_bytes):
19
  """Transcribe audio bytes to text using Google Cloud Speech to Text."""
20
 
21
+ # Crea un cliente de Speech to Text
22
+ client = speech_v1.SpeechClient()
23
+
24
+ # Configura la configuración de la solicitud
25
+ config = speech_v1.RecognitionConfig()
26
+ config.language_code = "es-ES"
27
+ config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
28
+ config.sample_rate_hertz = 16000
29
+
30
+ # Crea una solicitud de reconocimiento de audio
31
+ audio = speech_v1.RecognitionAudio(content=audio_bytes)
32
+ request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
33
+
34
+ # Realiza la transcripción
35
+ response = client.recognize_speech(request)
36
+
37
+ # Extrae el texto transcrito
38
+ transcript = response.results[0].alternatives[0].transcript
39
+
40
+ return transcript
41
 
42
 
43
  demo = gr.Interface(
44
  transcribe,
45
  gr.Audio(sources=["microphone"], streaming=False),
46
  "text",
47
+ #live=True, # No muestra el botón de Submit.
48
  )
49
 
50
  demo.launch()