cesar commited on
Commit
f452077
·
verified ·
1 Parent(s): 2c6849a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -1,7 +1,9 @@
 
1
  import os
2
  import io
3
  import gradio as gr
4
  import subprocess
 
5
  from google.cloud import speech
6
  from google.api_core.client_options import ClientOptions
7
 
@@ -15,12 +17,17 @@ except KeyError:
15
  client_options = ClientOptions(api_key=API_KEY)
16
  client = speech.SpeechClient(client_options=client_options)
17
 
 
 
 
 
 
18
  def convert_to_wav(input_file):
19
- """Convierte archivos de audio a formato WAV LINEAR16 si es necesario."""
20
  output_file = input_file + ".wav"
21
  command = [
22
  "ffmpeg", "-y", "-i", input_file,
23
- "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "1", output_file
24
  ]
25
  subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
26
  return output_file
@@ -34,10 +41,17 @@ def transcribe(audio_file=None):
34
  if not audio_file.endswith(".wav"):
35
  audio_file = convert_to_wav(audio_file)
36
 
 
 
 
 
 
 
 
37
  # Configuración de la solicitud
38
  config = speech.RecognitionConfig(
39
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
40
- sample_rate_hertz=44100,
41
  audio_channel_count=1,
42
  language_code="es-AR",
43
  )
 
1
+
2
  import os
3
  import io
4
  import gradio as gr
5
  import subprocess
6
+ import wave
7
  from google.cloud import speech
8
  from google.api_core.client_options import ClientOptions
9
 
 
17
  client_options = ClientOptions(api_key=API_KEY)
18
  client = speech.SpeechClient(client_options=client_options)
19
 
20
+ def get_sample_rate(file_path):
21
+ """Obtiene la tasa de muestreo (sample rate) de un archivo de audio."""
22
+ with wave.open(file_path, "rb") as wf:
23
+ return wf.getframerate()
24
+
25
  def convert_to_wav(input_file):
26
+ """Convierte archivos de audio a WAV LINEAR16 con la tasa de muestreo correcta."""
27
  output_file = input_file + ".wav"
28
  command = [
29
  "ffmpeg", "-y", "-i", input_file,
30
+ "-acodec", "pcm_s16le", "-ar", "48000", "-ac", "1", output_file # 48000 Hz para evitar el error
31
  ]
32
  subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
33
  return output_file
 
41
  if not audio_file.endswith(".wav"):
42
  audio_file = convert_to_wav(audio_file)
43
 
44
+ # Verificar el tamaño del archivo (máximo 10MB)
45
+ if os.path.getsize(audio_file) > 10 * 1024 * 1024:
46
+ return "Error: El archivo de audio supera los 10MB. Usa un archivo más pequeño.", ""
47
+
48
+ # Obtener la tasa de muestreo real del archivo convertido
49
+ sample_rate = get_sample_rate(audio_file)
50
+
51
  # Configuración de la solicitud
52
  config = speech.RecognitionConfig(
53
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
54
+ sample_rate_hertz=sample_rate, # Usamos la tasa de muestreo detectada automáticamente
55
  audio_channel_count=1,
56
  language_code="es-AR",
57
  )