fcernafukuzaki commited on
Commit
d9d94b1
·
verified ·
1 Parent(s): 60ff89a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -79
app.py CHANGED
@@ -1,96 +1,51 @@
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
- import numpy as np
4
- #from google.cloud import speech_v1
5
  from google.cloud import speech
6
- from google.protobuf import timestamp_pb2
7
 
8
- import io
9
- import os
10
- """Lista los archivos en la carpeta de ejecución."""
11
- archivos = os.listdir()
12
- print("\n".join(archivos))
13
- print(os.getcwd())
14
 
15
  rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
16
- print('/'.join(rutas))
17
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
18
 
19
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
20
 
21
- def transcribe(audio_bytes):
22
- print(type(audio_bytes))
23
  """Transcribe audio bytes to text using Google Cloud Speech to Text."""
24
-
25
- sr, y = audio_bytes
26
- y = y.astype(np.float32)
27
- y /= np.max(np.abs(y))
28
 
29
- return transcriber({"sampling_rate": sr, "raw": y})["text"]
30
-
31
- def transcribe_2(audio_bytes):
32
- """Transcribe audio bytes to text using Google Cloud Speech to Text."""
33
-
34
- # Crea un cliente de Speech to Text
35
- #client = speech_v1.SpeechClient()
36
- client = speech.SpeechClient()
37
-
38
- # Configura la configuración de la solicitud
39
- #config = speech_v1.RecognitionConfig()
40
- #config.language_code = "es-AR"
41
- #config.encoding = speech_v1.RecognitionConfig.Encoding.LINEAR16
42
- #config.sample_rate_hertz = 16000
43
- config = speech.RecognitionConfig(
44
- encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
45
- enable_automatic_punctuation=True,
46
- audio_channel_count=1,
47
- language_code="es-AR",
48
- )
49
-
50
- # Crea una solicitud de reconocimiento de audio
51
- #audio = speech_v1.RecognitionAudio(content=audio_bytes)
52
- #request = speech_v1.RecognizeSpeechRequest(config=config, audio=audio)
53
- print(f"{type(audio_bytes)} {audio_bytes}")
54
- file_name = audio_bytes
55
- #sr, y = audio_bytes
56
- #print(f"{type(sr)} {sr}")
57
- #print(type(y))
58
- #y = y.astype(np.float32)
59
- #y /= np.max(np.abs(y))
60
-
61
- #import scipy.io.wavfile as wav
62
- #RATE = sr
63
- #numpydata = y
64
- #file_name = 'out.wav'
65
- #wav.write(file_name, RATE, numpydata)
66
- #the path of your audio file
67
- with io.open(file_name, "rb") as audio_file:
68
- content = audio_file.read()
69
- audio = speech.RecognitionAudio(content=content)
70
 
71
- #audio = speech.RecognitionAudio(content=audio_bytes)
72
- # Sends the request to google to transcribe the audio
73
- response = client.recognize(request={"config": config, "audio": audio})
74
- transcript = []
75
- # Reads the response
76
- for result in response.results:
77
- print("Transcript: {}".format(result.alternatives[0].transcript))
78
- transcript.append(result.alternatives[0].transcript)
79
-
80
- # Realiza la transcripción
81
- #response = client.recognize_speech(request)
82
-
83
- # Extrae el texto transcrito
84
- #transcript = response.results[0].alternatives[0].transcript
85
-
86
- return transcript
87
 
88
 
89
  demo = gr.Interface(
90
- transcribe_2,
91
- gr.Audio(sources=["microphone"], type="filepath", streaming=False),
92
- "text",
93
- #live=True, # No muestra el botón de Submit.
 
94
  )
95
 
96
  demo.launch()
 
1
+ import io
2
+ import os
3
  import gradio as gr
 
 
 
4
  from google.cloud import speech
 
5
 
 
 
 
 
 
 
6
 
7
  rutas = [os.getcwd(),"deploygpt-e9475e7c2c7c.json"]
 
8
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '/'.join(rutas)
9
 
 
10
 
11
+ def transcribe(file_name):
 
12
  """Transcribe audio bytes to text using Google Cloud Speech to Text."""
13
+ if not file_name:
14
+ # Crea un cliente de Speech to Text
15
+ client = speech.SpeechClient()
 
16
 
17
+ # Configura la configuración de la solicitud
18
+ config = speech.RecognitionConfig(
19
+ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
20
+ enable_automatic_punctuation=True,
21
+ audio_channel_count=1,
22
+ language_code="es-AR",
23
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Crea una solicitud de reconocimiento de audio
26
+ with io.open(file_name, "rb") as audio_file:
27
+ content = audio_file.read()
28
+ audio = speech.RecognitionAudio(content=content)
29
+
30
+ # Realiza la transcripción
31
+ response = client.recognize(request={"config": config, "audio": audio})
32
+
33
+ transcript = []
34
+ # Reads the response
35
+ for result in response.results:
36
+ print("Transcript: {}".format(result.alternatives[0].transcript))
37
+ transcript.append(result.alternatives[0].transcript)
38
+
39
+ return ' '.join(transcript)
40
+ return ''
41
 
42
 
43
  demo = gr.Interface(
44
+ transcribe,
45
+ gr.Audio(sources=["microphone"],
46
+ type="filepath", # Crea un archivo temporal en formato wav
47
+ streaming=False),
48
+ "text"
49
  )
50
 
51
  demo.launch()