Spaces:
Running
Running
Update app.py
Browse fileslist instead of queue for audio output chunk file mgt
app.py
CHANGED
@@ -11,7 +11,8 @@ from time import sleep
|
|
11 |
import audioread
|
12 |
import queue
|
13 |
import threading
|
14 |
-
from
|
|
|
15 |
|
16 |
load_dotenv(override=True)
|
17 |
key = os.getenv('OPENAI_API_KEY')
|
@@ -30,13 +31,10 @@ else:
|
|
30 |
dp.mkdir(exist_ok=True)
|
31 |
dataDir = '/data/'
|
32 |
|
33 |
-
|
34 |
|
35 |
client = OpenAI(api_key = key)
|
36 |
|
37 |
-
qspeech = queue.Queue()
|
38 |
-
qdelete = queue.Queue()
|
39 |
-
|
40 |
#digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
|
41 |
|
42 |
abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
|
@@ -132,12 +130,9 @@ def genUsageStats(do_reset=False):
|
|
132 |
result.append(['totals', f'{ttotal4mini_in}/{ttotal4mini_out}', f'{ttotal4o_in}/{ttotal4o_out}', f'audio:{totalAudio}',f'speech:{totalSpeech}'])
|
133 |
return result
|
134 |
|
135 |
-
def
|
136 |
-
|
137 |
-
|
138 |
-
if os.path.exists(fname):
|
139 |
-
os.remove(fname)
|
140 |
-
return [None, [], None]
|
141 |
|
142 |
def updatePassword(txt):
|
143 |
return [txt.lower().strip(), "*********"]
|
@@ -159,6 +154,11 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
|
|
159 |
if prompt.startswith('gpt4'):
|
160 |
gptModel = 'gpt-4o'
|
161 |
prompt = prompt[5:]
|
|
|
|
|
|
|
|
|
|
|
162 |
if user_window in unames and pwd_window in pwdList:
|
163 |
past.append({"role":"user", "content":prompt})
|
164 |
completion = client.chat.completions.create(model=gptModel,
|
@@ -226,32 +226,68 @@ def pause_message():
|
|
226 |
|
227 |
def set_speak_button(txt):
|
228 |
vis = False
|
229 |
-
if len(txt) >
|
230 |
vis = True
|
231 |
return gr.Button(visible=vis)
|
232 |
|
233 |
-
def
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
238 |
|
239 |
-
def speech_worker(chunks=[]):
|
240 |
for chunk in chunks:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
def initial_audio_output(txt, user):
|
251 |
global digits
|
252 |
global abbrevs
|
253 |
-
|
254 |
-
|
|
|
255 |
for s,x in abbrevs.items():
|
256 |
txt = txt.replace(s, x)
|
257 |
words_in = txt.replace('**', '').splitlines(False)
|
@@ -310,25 +346,25 @@ with gr.Blocks() as demo:
|
|
310 |
chunk = chunklist[0]
|
311 |
if chunk.strip() == '':
|
312 |
return gr.Audio(sources=None)
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
|
|
|
|
321 |
|
322 |
-
def gen_output_audio():
|
323 |
try:
|
324 |
-
fname =
|
325 |
except:
|
326 |
-
return gr.Audio(sources=None)
|
327 |
-
return fname
|
|
|
328 |
|
329 |
-
history = gr.State([])
|
330 |
-
password = gr.State("")
|
331 |
-
model = gr.State("gpt-4o-mini")
|
332 |
gr.Markdown('# GPT Chat')
|
333 |
gr.Markdown('Enter user name & password then enter prompt and click submit button. Restart conversation if topic changes')
|
334 |
gr.Markdown('You can enter prompts by voice. Tap Record, speak, then tap Stop.' +
|
@@ -336,6 +372,7 @@ with gr.Blocks() as demo:
|
|
336 |
# heading = gr.Label(value="GPT Chat", scale=2, color="Crimson" )
|
337 |
with gr.Row():
|
338 |
user_window = gr.Textbox(label = "User Name")
|
|
|
339 |
pwd_window = gr.Textbox(label = "Password")
|
340 |
pwd_window.blur(updatePassword, pwd_window, [password, pwd_window])
|
341 |
with gr.Row():
|
@@ -350,16 +387,16 @@ with gr.Blocks() as demo:
|
|
350 |
speak_output = gr.Button(value="Speak Dialog", visible=False)
|
351 |
prompt_window = gr.Textbox(label = "Prompt or Question")
|
352 |
output_window = gr.Textbox(label = "Dialog")
|
353 |
-
submit_button.click(chat, inputs=[prompt_window,
|
354 |
outputs=[history, output_window, prompt_window, model])
|
355 |
-
clear_button.click(
|
356 |
-
audio_widget.stop_recording(fn=transcribe, inputs=[
|
357 |
outputs=[prompt_window])
|
358 |
audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
|
359 |
reset_button.add(audio_widget)
|
360 |
audio_out = gr.Audio(autoplay=True, visible=False)
|
361 |
-
audio_out.stop(fn=gen_output_audio, inputs=
|
362 |
-
speak_output.click(fn=initial_audio_output, inputs=[output_window,
|
363 |
output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
|
364 |
-
demo.unload(
|
365 |
demo.launch(share=True)
|
|
|
11 |
import audioread
|
12 |
import queue
|
13 |
import threading
|
14 |
+
from glob import glob
|
15 |
+
import copy
|
16 |
|
17 |
load_dotenv(override=True)
|
18 |
key = os.getenv('OPENAI_API_KEY')
|
|
|
31 |
dp.mkdir(exist_ok=True)
|
32 |
dataDir = '/data/'
|
33 |
|
34 |
+
speak_file = dataDir + "speek.wav"
|
35 |
|
36 |
client = OpenAI(api_key = key)
|
37 |
|
|
|
|
|
|
|
38 |
#digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
|
39 |
|
40 |
abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
|
|
|
130 |
result.append(['totals', f'{ttotal4mini_in}/{ttotal4mini_out}', f'{ttotal4o_in}/{ttotal4o_out}', f'audio:{totalAudio}',f'speech:{totalSpeech}'])
|
131 |
return result
|
132 |
|
133 |
+
def new_conversation(user):
|
134 |
+
clean_up(user)
|
135 |
+
return [None, [], None, []]
|
|
|
|
|
|
|
136 |
|
137 |
def updatePassword(txt):
|
138 |
return [txt.lower().strip(), "*********"]
|
|
|
154 |
if prompt.startswith('gpt4'):
|
155 |
gptModel = 'gpt-4o'
|
156 |
prompt = prompt[5:]
|
157 |
+
if prompt.startswith("clean"):
|
158 |
+
user = prompt[6:]
|
159 |
+
response = f'cleaned all .wav files for {user}'
|
160 |
+
final_clean_up(user)
|
161 |
+
return [past, response, None, gptModel]
|
162 |
if user_window in unames and pwd_window in pwdList:
|
163 |
past.append({"role":"user", "content":prompt})
|
164 |
completion = client.chat.completions.create(model=gptModel,
|
|
|
226 |
|
227 |
def set_speak_button(txt):
|
228 |
vis = False
|
229 |
+
if len(txt) > 2:
|
230 |
vis = True
|
231 |
return gr.Button(visible=vis)
|
232 |
|
233 |
+
def update_user(txt):
|
234 |
+
return txt.lower()
|
235 |
+
|
236 |
+
# def clean_up(q):
|
237 |
+
# for fname in q:
|
238 |
+
# if os.path.exists(fname):
|
239 |
+
# os.remove(fname)
|
240 |
+
# return []
|
241 |
|
242 |
+
def speech_worker(chunks=[],q=[]):
|
243 |
for chunk in chunks:
|
244 |
+
fpath = q.pop(0)
|
245 |
+
response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
|
246 |
+
with open(fpath, 'wb') as fp:
|
247 |
+
fp.write(response.content)
|
248 |
+
|
249 |
+
# def gen_speech_file(user):
|
250 |
+
# global speech_file_index
|
251 |
+
# speech_file_index += 1
|
252 |
+
# fname = dataDir + f'user_speech{speech_file_index}.wav'
|
253 |
+
# return fname
|
254 |
+
|
255 |
+
def gen_speech_file_names(user, cnt):
|
256 |
+
rv = []
|
257 |
+
for i in range(0, cnt):
|
258 |
+
rv.append(dataDir + f'{user}_speech{i}.wav')
|
259 |
+
return rv
|
260 |
|
261 |
+
def final_clean_up(user):
|
262 |
+
flist = glob(dataDir + f'{user}_speech*.wav')
|
263 |
+
for fpath in flist:
|
264 |
+
try:
|
265 |
+
os.remove(fpath)
|
266 |
+
except:
|
267 |
+
continue
|
268 |
|
269 |
with gr.Blocks() as demo:
|
270 |
+
history = gr.State([])
|
271 |
+
password = gr.State("")
|
272 |
+
user = gr.State("")
|
273 |
+
model = gr.State("gpt-4o-mini")
|
274 |
+
q = gr.State([])
|
275 |
+
qsave = gr.State([])
|
276 |
+
|
277 |
+
def clean_up(user):
|
278 |
+
flist = glob(dataDir + f'{user}_speech*.wav')
|
279 |
+
for fpath in flist:
|
280 |
+
try:
|
281 |
+
os.remove(fpath)
|
282 |
+
except:
|
283 |
+
continue
|
284 |
+
|
285 |
def initial_audio_output(txt, user):
|
286 |
global digits
|
287 |
global abbrevs
|
288 |
+
q = []
|
289 |
+
if len(txt.strip()) < 5:
|
290 |
+
return ['None', q]
|
291 |
for s,x in abbrevs.items():
|
292 |
txt = txt.replace(s, x)
|
293 |
words_in = txt.replace('**', '').splitlines(False)
|
|
|
346 |
chunk = chunklist[0]
|
347 |
if chunk.strip() == '':
|
348 |
return gr.Audio(sources=None)
|
349 |
+
fname_list = gen_speech_file_names(user, len(chunklist))
|
350 |
+
q = fname_list.copy()
|
351 |
+
qsave = fname_list.copy()
|
352 |
+
fname = q.pop(0)
|
353 |
+
if len(chunklist) > 0:
|
354 |
+
threading.Thread(target=speech_worker, daemon=True, args=(chunklist[1:],fname_list[1:])).start()
|
355 |
+
response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
|
356 |
+
with open(fname, 'wb') as fp:
|
357 |
+
fp.write(response.content)
|
358 |
+
return [fname, q]
|
359 |
|
360 |
+
def gen_output_audio(q):
|
361 |
try:
|
362 |
+
fname = q.pop(0)
|
363 |
except:
|
364 |
+
return [None, gr.Audio(sources=None)]
|
365 |
+
return [fname, q]
|
366 |
+
|
367 |
|
|
|
|
|
|
|
368 |
gr.Markdown('# GPT Chat')
|
369 |
gr.Markdown('Enter user name & password then enter prompt and click submit button. Restart conversation if topic changes')
|
370 |
gr.Markdown('You can enter prompts by voice. Tap Record, speak, then tap Stop.' +
|
|
|
372 |
# heading = gr.Label(value="GPT Chat", scale=2, color="Crimson" )
|
373 |
with gr.Row():
|
374 |
user_window = gr.Textbox(label = "User Name")
|
375 |
+
user_window.blur(update_user, user_window, user)
|
376 |
pwd_window = gr.Textbox(label = "Password")
|
377 |
pwd_window.blur(updatePassword, pwd_window, [password, pwd_window])
|
378 |
with gr.Row():
|
|
|
387 |
speak_output = gr.Button(value="Speak Dialog", visible=False)
|
388 |
prompt_window = gr.Textbox(label = "Prompt or Question")
|
389 |
output_window = gr.Textbox(label = "Dialog")
|
390 |
+
submit_button.click(chat, inputs=[prompt_window, user, password, history, output_window, model],
|
391 |
outputs=[history, output_window, prompt_window, model])
|
392 |
+
clear_button.click(fn=new_conversation, inputs=user, outputs=[prompt_window, history, output_window])
|
393 |
+
audio_widget.stop_recording(fn=transcribe, inputs=[user, password, audio_widget],
|
394 |
outputs=[prompt_window])
|
395 |
audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
|
396 |
reset_button.add(audio_widget)
|
397 |
audio_out = gr.Audio(autoplay=True, visible=False)
|
398 |
+
audio_out.stop(fn=gen_output_audio, inputs=q, outputs = [audio_out, q])
|
399 |
+
speak_output.click(fn=initial_audio_output, inputs=[output_window, user], outputs=[audio_out, q])
|
400 |
output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
|
401 |
+
demo.unload(final_clean_up(user))
|
402 |
demo.launch(share=True)
|