salomonsky commited on
Commit
188e3eb
·
verified ·
1 Parent(s): 68c7498

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -9,7 +9,8 @@ from huggingface_hub import InferenceClient
9
  from streamlit_mic_recorder import mic_recorder
10
  import wave
11
  import webrtcvad
12
- import sounddevice as sd
 
13
 
14
  if "history" not in st.session_state:
15
  st.session_state.history = []
@@ -104,17 +105,12 @@ def display_recognition_result(audio_text, output, audio_file):
104
  def voice_activity_detection(audio_data):
105
  return vad.is_speech(audio_data, sample_rate)
106
 
107
- def audio_callback(indata, frames, time, status):
108
- assert frames == block_size
109
- audio_data = indata[::downsample, mapping]
110
- audio_data = map(lambda x: (x + 1) / 2, audio_data)
111
- audio_data = np.fromiter(audio_data, np.float16)
112
- audio_data = audio_data.tobytes()
113
- detection = voice_activity_detection(audio_data)
114
- print(detection)
115
-
116
  def start_stream():
117
- stream.start()
 
 
 
 
118
 
119
  class Threader(threading.Thread):
120
  def __init__(self, *args, **kwargs):
@@ -139,7 +135,7 @@ if __name__ == "__main__":
139
  sample_rate = audio["sample_rate"]
140
  num_channels = 1
141
 
142
- with wave.open(temp_audio_file_path, 'w') as wave_file:
143
  wave_file.setnchannels(num_channels)
144
  wave_file.setsampwidth(sample_width)
145
  wave_file.setframerate(sample_rate)
@@ -149,11 +145,10 @@ if __name__ == "__main__":
149
 
150
  channels = [1]
151
  mapping = [c - 1 for c in channels]
152
- device_info = sd.query_devices(16, 'input')
153
- sample_rate = int(device_info['default_samplerate'])
154
  interval_size = 10
155
  downsample = 1
156
  block_size = int(sample_rate * interval_size / 1000)
157
 
158
  Threader(name='mythread')
159
- st.button("Detener Stream")
 
9
  from streamlit_mic_recorder import mic_recorder
10
  import wave
11
  import webrtcvad
12
+
13
+ temp_audio_file_path = "./output.wav"
14
 
15
  if "history" not in st.session_state:
16
  st.session_state.history = []
 
105
  def voice_activity_detection(audio_data):
106
  return vad.is_speech(audio_data, sample_rate)
107
 
 
 
 
 
 
 
 
 
 
108
  def start_stream():
109
+ with wave.open(temp_audio_file_path, 'rb') as wave_file:
110
+ frames = wave_file.readframes(wave_file.getnframes())
111
+ audio_data = np.frombuffer(frames, dtype=np.int16)
112
+ detection = voice_activity_detection(audio_data)
113
+ print(detection)
114
 
115
  class Threader(threading.Thread):
116
  def __init__(self, *args, **kwargs):
 
135
  sample_rate = audio["sample_rate"]
136
  num_channels = 1
137
 
138
+ with wave.open(temp_audio_file_path, 'wb') as wave_file:
139
  wave_file.setnchannels(num_channels)
140
  wave_file.setsampwidth(sample_width)
141
  wave_file.setframerate(sample_rate)
 
145
 
146
  channels = [1]
147
  mapping = [c - 1 for c in channels]
148
+ sample_rate = int(sr.AudioFile(temp_audio_file_path).samplerate)
 
149
  interval_size = 10
150
  downsample = 1
151
  block_size = int(sample_rate * interval_size / 1000)
152
 
153
  Threader(name='mythread')
154
+ st.button("Detener Stream")