salomonsky commited on
Commit
e3f9866
·
verified ·
1 Parent(s): 4660295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -15
app.py CHANGED
@@ -1,11 +1,9 @@
1
  import streamlit as st
2
- import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
  from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
- from pydub import AudioSegment
9
 
10
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
11
 
@@ -17,10 +15,16 @@ if "pre_prompt_sent" not in st.session_state:
17
 
18
  def recognize_speech(audio_data, show_messages=True):
19
  recognizer = sr.Recognizer()
20
- audio_recording = sr.AudioFile(audio_data)
 
 
 
 
 
21
 
22
  with audio_recording as source:
23
  audio = recognizer.record(source)
 
24
  try:
25
  audio_text = recognizer.recognize_google(audio, language="es-ES")
26
  if show_messages:
@@ -71,20 +75,15 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
71
  response += response_token.token.text
72
 
73
  response = ' '.join(response.split()).replace('</s>', '')
74
- audio_file = text_to_speech(response, speed=1.3)
75
  return response, audio_file
76
 
77
- def text_to_speech(text, speed=1.3):
78
  tts = gTTS(text=text, lang='es')
79
  audio_fp = io.BytesIO()
80
  tts.write_to_fp(audio_fp)
81
  audio_fp.seek(0)
82
- audio = AudioSegment.from_mp3(audio_fp)
83
- modified_speed_audio = audio.speedup(playback_speed=speed)
84
- modified_audio_fp = io.BytesIO()
85
- modified_speed_audio.export(modified_audio_fp, format="mp3")
86
- modified_audio_fp.seek(0)
87
- return modified_audio_fp
88
 
89
  def audio_play(audio_file):
90
  if audio_file is not None:
@@ -103,10 +102,10 @@ def main():
103
 
104
  audio_bytes = audiorecorder("Hablar ▶️", "Detener 🛑")
105
  if audio_bytes:
106
- audio_bytes = io.BytesIO(audio_bytes)
107
- audio_bytes.seek(0)
108
- audio_text = recognize_speech(audio_bytes)
109
-
110
  if audio_text:
111
  output, audio_file = generate(audio_text, history=st.session_state.history)
112
  display_recognition_result(audio_text, output, audio_file)
 
1
  import streamlit as st
 
2
  import io
3
  from huggingface_hub import InferenceClient
4
  from gtts import gTTS
5
  from audiorecorder import audiorecorder
6
  import speech_recognition as sr
 
7
 
8
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
9
 
 
15
 
16
  def recognize_speech(audio_data, show_messages=True):
17
  recognizer = sr.Recognizer()
18
+
19
+ audio_filename = "temp_audio_file.wav"
20
+ with open(audio_filename, "wb") as f:
21
+ f.write(audio_data.read())
22
+
23
+ audio_recording = sr.AudioFile(audio_filename)
24
 
25
  with audio_recording as source:
26
  audio = recognizer.record(source)
27
+
28
  try:
29
  audio_text = recognizer.recognize_google(audio, language="es-ES")
30
  if show_messages:
 
75
  response += response_token.token.text
76
 
77
  response = ' '.join(response.split()).replace('</s>', '')
78
+ audio_file = text_to_speech(response)
79
  return response, audio_file
80
 
81
+ def text_to_speech(text):
82
  tts = gTTS(text=text, lang='es')
83
  audio_fp = io.BytesIO()
84
  tts.write_to_fp(audio_fp)
85
  audio_fp.seek(0)
86
+ return audio_fp
 
 
 
 
 
87
 
88
  def audio_play(audio_file):
89
  if audio_file is not None:
 
102
 
103
  audio_bytes = audiorecorder("Hablar ▶️", "Detener 🛑")
104
  if audio_bytes:
105
+ audio_data = io.BytesIO(audio_bytes)
106
+ audio_data.seek(0)
107
+ audio_text = recognize_speech(audio_data)
108
+
109
  if audio_text:
110
  output, audio_file = generate(audio_text, history=st.session_state.history)
111
  display_recognition_result(audio_text, output, audio_file)