Spaces:
Build error
Build error
File size: 3,999 Bytes
b9fb710 bc8d394 190e978 1ba4a0c f797e13 6e090f6 f797e13 190e978 1ba4a0c f797e13 376e42a f797e13 61a2dd5 f797e13 b9fb710 f797e13 71f6464 b9fb710 4f40fa8 b9fb710 4f40fa8 b9fb710 71f6464 1ba4a0c bc8d394 71f6464 1ba4a0c bc8d394 1ba4a0c f797e13 db56cf5 f797e13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import numpy as np
import time
import os
import httpx
from queue import Queue
import logging
from datetime import UTC, datetime, timedelta
from time import sleep
import pickle
import speech_recognition as sr
from audio_utils import get_microphone, get_speech_recognizer, get_all_audio_queue, to_audio_array, AudioChunk
logger = logging.getLogger(__name__)
TRANSCRIBING_SERVER = os.getenv('TRANSCRIBING_SERVER', "http://localhost:3535/transcribe")
def main():
recording_duration = 1
sample_rate = 16000
energy_threshold = 300
data_queue = Queue()
microphone = get_microphone(sample_rate=sample_rate)
speech_recognizer = get_speech_recognizer(energy_threshold=energy_threshold)
with microphone:
speech_recognizer.adjust_for_ambient_noise(source=microphone)
def record_callback(_, audio: sr.AudioData) -> None:
data = audio.get_raw_data()
data_queue.put(data)
speech_recognizer.listen_in_background(source=microphone, callback=record_callback, phrase_time_limit=recording_duration)
print("\n🎤 Microphone is now listening...\n")
prev_audio_array = None
current_audio_chunk = AudioChunk(start_time=datetime.now(tz=UTC))
while True:
try:
now = datetime.now(tz=UTC)
# Pull raw recorded audio from the queue.
if not data_queue.empty():
# Store end time if we're over the recording time limit.
if now - current_audio_chunk.start_time > timedelta(seconds=recording_duration):
current_audio_chunk.end_time = now
# Get audio data from queue
audio_data = get_all_audio_queue(data_queue)
audio_np_array = to_audio_array(audio_data)
if current_audio_chunk.is_complete:
print('start serialize')
if prev_audio_array:
serialized = pickle.dumps(
np.concatenate((
prev_audio_array,
current_audio_chunk.audio_array
))
)
else:
serialized = pickle.dumps(current_audio_chunk.audio_array)
prev_audio_array = current_audio_chunk.audio_array
print('end serialize')
start = time.time()
print('start req')
response = httpx.post(TRANSCRIBING_SERVER, data=serialized)
print('req done', response.text, response.status_code, time.time() - start)
# text = transcribe_model.transcribe(current_audio_chunk.audio_array)
# sentence = Sentence(
# start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
# )
current_audio_chunk = AudioChunk(
audio_array=audio_np_array, start_time=datetime.now(tz=UTC)
)
# print(sentence.text) # noqa: T201
else:
current_audio_chunk.update_array(audio_np_array)
# Flush stdout
print("", end="", flush=True) # noqa: T201
# Infinite loops are bad for processors, must sleep.
sleep(0.25)
except KeyboardInterrupt:
current_audio_chunk.end_time = datetime.now(tz=UTC)
if current_audio_chunk.is_complete:
logger.warning("⚠️ Transcribing last chunk...")
# text = transcribe_model.transcribe(current_audio_chunk.audio_array)
# sentence = Sentence(
# start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
# )
# print(sentence.text) # noqa: T201
break
if __name__ == '__main__':
main()
|