Spaces:
Build error
Build error
File size: 5,612 Bytes
005a35d 319ab81 b9fb710 bc8d394 190e978 1ba4a0c f797e13 6e090f6 f797e13 a41954d f797e13 190e978 1ba4a0c afbc2cb 376e42a f797e13 61a2dd5 f797e13 b9fb710 f797e13 71f6464 df4e440 b9fb710 4f40fa8 b9fb710 4f40fa8 b9fb710 71f6464 1ba4a0c bc8d394 71f6464 1ba4a0c afbc2cb bc8d394 afbc2cb 1ba4a0c f797e13 db56cf5 f797e13 afbc2cb 319ab81 afbc2cb 319ab81 3a9492e 319ab81 3a9492e 319ab81 a41954d 319ab81 afbc2cb f0af7d8 319ab81 f797e13 afbc2cb 005a35d afbc2cb 005a35d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import threading
from starlette.applications import Starlette
from starlette.routing import Route
from sse_starlette.sse import EventSourceResponse
import uvicorn
import asyncio
import numpy as np
import time
import os
import httpx
from queue import Queue
import logging
from datetime import UTC, datetime, timedelta
from time import sleep
import pickle
import speech_recognition as sr
from audio_utils import get_microphone, get_speech_recognizer, get_all_audio_queue, to_audio_array, AudioChunk
from starlette.middleware.cors import CORSMiddleware
logger = logging.getLogger(__name__)
TRANSCRIBING_SERVER = os.getenv('TRANSCRIBING_SERVER', "http://localhost:3535/transcribe")
def main(transcriptions_queue):
recording_duration = 1
sample_rate = 16000
energy_threshold = 300
data_queue = Queue()
microphone = get_microphone(sample_rate=sample_rate)
speech_recognizer = get_speech_recognizer(energy_threshold=energy_threshold)
with microphone:
speech_recognizer.adjust_for_ambient_noise(source=microphone)
def record_callback(_, audio: sr.AudioData) -> None:
data = audio.get_raw_data()
data_queue.put(data)
speech_recognizer.listen_in_background(source=microphone, callback=record_callback, phrase_time_limit=recording_duration)
print("\n🎤 Microphone is now listening...\n")
prev_audio_array = None
current_audio_chunk = AudioChunk(start_time=datetime.now(tz=UTC))
while True:
try:
now = datetime.now(tz=UTC)
# Pull raw recorded audio from the queue.
if not data_queue.empty():
# Store end time if we're over the recording time limit.
if now - current_audio_chunk.start_time > timedelta(seconds=recording_duration):
current_audio_chunk.end_time = now
# Get audio data from queue
audio_data = get_all_audio_queue(data_queue)
audio_np_array = to_audio_array(audio_data)
if current_audio_chunk.is_complete:
print('start serialize')
if prev_audio_array is not None:
serialized = pickle.dumps(
np.concatenate((
prev_audio_array,
current_audio_chunk.audio_array
))
)
else:
serialized = pickle.dumps(current_audio_chunk.audio_array)
prev_audio_array = current_audio_chunk.audio_array
print('end serialize')
start = time.time()
print('start req')
response = httpx.post(TRANSCRIBING_SERVER, data=serialized)
transcription = response.json()['transcribe']
print('req done', response.text, response.status_code, time.time() - start)
transcriptions_queue.put(transcription)
# text = transcribe_model.transcribe(current_audio_chunk.audio_array)
# sentence = Sentence(
# start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
# )
current_audio_chunk = AudioChunk(
audio_array=audio_np_array, start_time=datetime.now(tz=UTC)
)
# print(sentence.text) # noqa: T201
else:
current_audio_chunk.update_array(audio_np_array)
# Flush stdout
print("", end="", flush=True) # noqa: T201
# Infinite loops are bad for processors, must sleep.
sleep(0.25)
except KeyboardInterrupt:
current_audio_chunk.end_time = datetime.now(tz=UTC)
if current_audio_chunk.is_complete:
logger.warning("⚠️ Transcribing last chunk...")
# text = transcribe_model.transcribe(current_audio_chunk.audio_array)
# sentence = Sentence(
# start_time=current_audio_chunk.start_time, end_time=current_audio_chunk.end_time, text=text
# )
# print(sentence.text) # noqa: T201
break
# for i in range(minimum, maximum + 1):
# await asyncio.sleep(0.9)
# yield dict(data=i)
async def sse(request):
async def event_publisher():
try:
while True:
text = transcriptions_queue.get()
yield dict(data=text)
await asyncio.sleep(0.2)
except asyncio.CancelledError as e:
print(f"Disconnected from client (via refresh/close) {request.client}")
return EventSourceResponse(event_publisher())
def test(request):
return "hello world"
routes = [
Route('/', endpoint=test),
Route("/test", endpoint=sse)
]
app = Starlette(debug=True, routes=routes)
app.add_middleware(CORSMiddleware, allow_origins=['*'], allow_methods=['*'], allow_headers=['*'])
def server(transcriptions_queue):
app.state.transcriptions_queue = transcriptions_queue
uvicorn.run(app, host="0.0.0.0", port=8343, log_level='info')
if __name__ == '__main__':
transcriptions_queue = Queue()
main_thread = threading.Thread(target=main, args=(transcriptions_queue,))
main_thread.start()
server_thread = threading.Thread(target=server, args=(transcriptions_queue,))
server_thread.start()
main_thread.join()
server_thread.join()
|