rrevo commited on
Commit
8d48726
·
1 Parent(s): 56aa84a
Files changed (1) hide show
  1. client/src/main.py +65 -64
client/src/main.py CHANGED
@@ -31,83 +31,84 @@ def main(transcriptions_queue):
31
  energy_threshold = 300
32
 
33
  data_queue = Queue()
 
 
34
 
35
- with get_microphone(sample_rate=sample_rate) as microphone:
36
- print('microphone is:', microphone)
37
  speech_recognizer = get_speech_recognizer(energy_threshold=energy_threshold)
38
  speech_recognizer.adjust_for_ambient_noise(source=microphone)
39
 
40
- def record_callback(_, audio: sr.AudioData) -> None:
41
- data = audio.get_raw_data()
42
- data_queue.put(data)
43
-
44
- speech_recognizer.listen_in_background(source=microphone, callback=record_callback, phrase_time_limit=recording_duration)
45
-
46
- print("\n🎤 Microphone is now listening...\n")
47
-
48
- prev_audio_array = None
49
- current_audio_chunk = AudioChunk(start_time=datetime.now(tz=UTC))
50
-
51
- while True:
52
- try:
53
- now = datetime.now(tz=UTC)
54
- # Pull raw recorded audio from the queue.
55
- if not data_queue.empty():
56
- # Store end time if we're over the recording time limit.
57
- if now - current_audio_chunk.start_time > timedelta(seconds=recording_duration):
58
- current_audio_chunk.end_time = now
59
-
60
- # Get audio data from queue
61
- audio_data = get_all_audio_queue(data_queue)
62
- audio_np_array = to_audio_array(audio_data)
63
-
64
- if current_audio_chunk.is_complete:
65
- print('start serialize')
66
- if prev_audio_array is not None:
67
- serialized = pickle.dumps(
68
- np.concatenate((
69
- prev_audio_array,
70
- current_audio_chunk.audio_array
71
- ))
72
- )
73
- else:
74
- serialized = pickle.dumps(current_audio_chunk.audio_array)
75
- prev_audio_array = current_audio_chunk.audio_array
76
- print('end serialize')
77
-
78
- start = time.time()
79
- print('start req')
80
- response = httpx.post(TRANSCRIBING_SERVER, data=serialized)
81
- transcription = response.json()['transcribe']
82
- print('req done', response.text, response.status_code, time.time() - start)
83
- transcriptions_queue.put(transcription)
84
-
85
- # text = transcribe_model.transcribe(current_audio_chunk.audio_array)
86
- # sentence = Sentence(
87
- # start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
88
- # )
89
- current_audio_chunk = AudioChunk(
90
- audio_array=audio_np_array, start_time=datetime.now(tz=UTC)
91
  )
92
- # print(sentence.text) # noqa: T201
93
  else:
94
- current_audio_chunk.update_array(audio_np_array)
 
 
95
 
96
- # Flush stdout
97
- print("", end="", flush=True) # noqa: T201
 
 
 
 
98
 
99
- # Infinite loops are bad for processors, must sleep.
100
- sleep(0.25)
101
- except KeyboardInterrupt:
102
- current_audio_chunk.end_time = datetime.now(tz=UTC)
103
- if current_audio_chunk.is_complete:
104
- logger.warning("⚠️ Transcribing last chunk...")
105
  # text = transcribe_model.transcribe(current_audio_chunk.audio_array)
106
  # sentence = Sentence(
107
  # start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
108
  # )
 
 
 
109
  # print(sentence.text) # noqa: T201
110
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
 
113
  # for i in range(minimum, maximum + 1):
 
31
  energy_threshold = 300
32
 
33
  data_queue = Queue()
34
+ microphone = get_microphone(sample_rate=sample_rate)
35
+ print('microphone is:', microphone)
36
 
37
+ with microphone:
 
38
  speech_recognizer = get_speech_recognizer(energy_threshold=energy_threshold)
39
  speech_recognizer.adjust_for_ambient_noise(source=microphone)
40
 
41
+ def record_callback(_, audio: sr.AudioData) -> None:
42
+ data = audio.get_raw_data()
43
+ data_queue.put(data)
44
+
45
+ speech_recognizer.listen_in_background(source=microphone, callback=record_callback, phrase_time_limit=recording_duration)
46
+
47
+ print("\n🎤 Microphone is now listening...\n")
48
+
49
+ prev_audio_array = None
50
+ current_audio_chunk = AudioChunk(start_time=datetime.now(tz=UTC))
51
+
52
+ while True:
53
+ try:
54
+ now = datetime.now(tz=UTC)
55
+ # Pull raw recorded audio from the queue.
56
+ if not data_queue.empty():
57
+ # Store end time if we're over the recording time limit.
58
+ if now - current_audio_chunk.start_time > timedelta(seconds=recording_duration):
59
+ current_audio_chunk.end_time = now
60
+
61
+ # Get audio data from queue
62
+ audio_data = get_all_audio_queue(data_queue)
63
+ audio_np_array = to_audio_array(audio_data)
64
+
65
+ if current_audio_chunk.is_complete:
66
+ print('start serialize')
67
+ if prev_audio_array is not None:
68
+ serialized = pickle.dumps(
69
+ np.concatenate((
70
+ prev_audio_array,
71
+ current_audio_chunk.audio_array
72
+ ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
 
74
  else:
75
+ serialized = pickle.dumps(current_audio_chunk.audio_array)
76
+ prev_audio_array = current_audio_chunk.audio_array
77
+ print('end serialize')
78
 
79
+ start = time.time()
80
+ print('start req')
81
+ response = httpx.post(TRANSCRIBING_SERVER, data=serialized)
82
+ transcription = response.json()['transcribe']
83
+ print('req done', response.text, response.status_code, time.time() - start)
84
+ transcriptions_queue.put(transcription)
85
 
 
 
 
 
 
 
86
  # text = transcribe_model.transcribe(current_audio_chunk.audio_array)
87
  # sentence = Sentence(
88
  # start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
89
  # )
90
+ current_audio_chunk = AudioChunk(
91
+ audio_array=audio_np_array, start_time=datetime.now(tz=UTC)
92
+ )
93
  # print(sentence.text) # noqa: T201
94
+ else:
95
+ current_audio_chunk.update_array(audio_np_array)
96
+
97
+ # Flush stdout
98
+ print("", end="", flush=True) # noqa: T201
99
+
100
+ # Infinite loops are bad for processors, must sleep.
101
+ sleep(0.25)
102
+ except KeyboardInterrupt:
103
+ current_audio_chunk.end_time = datetime.now(tz=UTC)
104
+ if current_audio_chunk.is_complete:
105
+ logger.warning("⚠️ Transcribing last chunk...")
106
+ # text = transcribe_model.transcribe(current_audio_chunk.audio_array)
107
+ # sentence = Sentence(
108
+ # start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
109
+ # )
110
+ # print(sentence.text) # noqa: T201
111
+ break
112
 
113
 
114
  # for i in range(minimum, maximum + 1):