Spaces:
Running
Running
Update app.py
Browse fileslist instead of queue for audio output chunk file mgt
app.py
CHANGED
|
@@ -11,7 +11,8 @@ from time import sleep
|
|
| 11 |
import audioread
|
| 12 |
import queue
|
| 13 |
import threading
|
| 14 |
-
from
|
|
|
|
| 15 |
|
| 16 |
load_dotenv(override=True)
|
| 17 |
key = os.getenv('OPENAI_API_KEY')
|
|
@@ -30,13 +31,10 @@ else:
|
|
| 30 |
dp.mkdir(exist_ok=True)
|
| 31 |
dataDir = '/data/'
|
| 32 |
|
| 33 |
-
|
| 34 |
|
| 35 |
client = OpenAI(api_key = key)
|
| 36 |
|
| 37 |
-
qspeech = queue.Queue()
|
| 38 |
-
qdelete = queue.Queue()
|
| 39 |
-
|
| 40 |
#digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
|
| 41 |
|
| 42 |
abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
|
|
@@ -132,12 +130,9 @@ def genUsageStats(do_reset=False):
|
|
| 132 |
result.append(['totals', f'{ttotal4mini_in}/{ttotal4mini_out}', f'{ttotal4o_in}/{ttotal4o_out}', f'audio:{totalAudio}',f'speech:{totalSpeech}'])
|
| 133 |
return result
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
if os.path.exists(fname):
|
| 139 |
-
os.remove(fname)
|
| 140 |
-
return [None, [], None]
|
| 141 |
|
| 142 |
def updatePassword(txt):
|
| 143 |
return [txt.lower().strip(), "*********"]
|
|
@@ -159,6 +154,11 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
|
|
| 159 |
if prompt.startswith('gpt4'):
|
| 160 |
gptModel = 'gpt-4o'
|
| 161 |
prompt = prompt[5:]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
if user_window in unames and pwd_window in pwdList:
|
| 163 |
past.append({"role":"user", "content":prompt})
|
| 164 |
completion = client.chat.completions.create(model=gptModel,
|
|
@@ -226,32 +226,68 @@ def pause_message():
|
|
| 226 |
|
| 227 |
def set_speak_button(txt):
|
| 228 |
vis = False
|
| 229 |
-
if len(txt) >
|
| 230 |
vis = True
|
| 231 |
return gr.Button(visible=vis)
|
| 232 |
|
| 233 |
-
def
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
|
|
|
|
|
|
|
|
|
| 238 |
|
| 239 |
-
def speech_worker(chunks=[]):
|
| 240 |
for chunk in chunks:
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
with gr.Blocks() as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
def initial_audio_output(txt, user):
|
| 251 |
global digits
|
| 252 |
global abbrevs
|
| 253 |
-
|
| 254 |
-
|
|
|
|
| 255 |
for s,x in abbrevs.items():
|
| 256 |
txt = txt.replace(s, x)
|
| 257 |
words_in = txt.replace('**', '').splitlines(False)
|
|
@@ -310,25 +346,25 @@ with gr.Blocks() as demo:
|
|
| 310 |
chunk = chunklist[0]
|
| 311 |
if chunk.strip() == '':
|
| 312 |
return gr.Audio(sources=None)
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
def gen_output_audio():
|
| 323 |
try:
|
| 324 |
-
fname =
|
| 325 |
except:
|
| 326 |
-
return gr.Audio(sources=None)
|
| 327 |
-
return fname
|
|
|
|
| 328 |
|
| 329 |
-
history = gr.State([])
|
| 330 |
-
password = gr.State("")
|
| 331 |
-
model = gr.State("gpt-4o-mini")
|
| 332 |
gr.Markdown('# GPT Chat')
|
| 333 |
gr.Markdown('Enter user name & password then enter prompt and click submit button. Restart conversation if topic changes')
|
| 334 |
gr.Markdown('You can enter prompts by voice. Tap Record, speak, then tap Stop.' +
|
|
@@ -336,6 +372,7 @@ with gr.Blocks() as demo:
|
|
| 336 |
# heading = gr.Label(value="GPT Chat", scale=2, color="Crimson" )
|
| 337 |
with gr.Row():
|
| 338 |
user_window = gr.Textbox(label = "User Name")
|
|
|
|
| 339 |
pwd_window = gr.Textbox(label = "Password")
|
| 340 |
pwd_window.blur(updatePassword, pwd_window, [password, pwd_window])
|
| 341 |
with gr.Row():
|
|
@@ -350,16 +387,16 @@ with gr.Blocks() as demo:
|
|
| 350 |
speak_output = gr.Button(value="Speak Dialog", visible=False)
|
| 351 |
prompt_window = gr.Textbox(label = "Prompt or Question")
|
| 352 |
output_window = gr.Textbox(label = "Dialog")
|
| 353 |
-
submit_button.click(chat, inputs=[prompt_window,
|
| 354 |
outputs=[history, output_window, prompt_window, model])
|
| 355 |
-
clear_button.click(
|
| 356 |
-
audio_widget.stop_recording(fn=transcribe, inputs=[
|
| 357 |
outputs=[prompt_window])
|
| 358 |
audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
|
| 359 |
reset_button.add(audio_widget)
|
| 360 |
audio_out = gr.Audio(autoplay=True, visible=False)
|
| 361 |
-
audio_out.stop(fn=gen_output_audio, inputs=
|
| 362 |
-
speak_output.click(fn=initial_audio_output, inputs=[output_window,
|
| 363 |
output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
|
| 364 |
-
demo.unload(
|
| 365 |
demo.launch(share=True)
|
|
|
|
| 11 |
import audioread
|
| 12 |
import queue
|
| 13 |
import threading
|
| 14 |
+
from glob import glob
|
| 15 |
+
import copy
|
| 16 |
|
| 17 |
load_dotenv(override=True)
|
| 18 |
key = os.getenv('OPENAI_API_KEY')
|
|
|
|
| 31 |
dp.mkdir(exist_ok=True)
|
| 32 |
dataDir = '/data/'
|
| 33 |
|
| 34 |
+
speak_file = dataDir + "speek.wav"
|
| 35 |
|
| 36 |
client = OpenAI(api_key = key)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
#digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
|
| 39 |
|
| 40 |
abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
|
|
|
|
| 130 |
result.append(['totals', f'{ttotal4mini_in}/{ttotal4mini_out}', f'{ttotal4o_in}/{ttotal4o_out}', f'audio:{totalAudio}',f'speech:{totalSpeech}'])
|
| 131 |
return result
|
| 132 |
|
| 133 |
+
def new_conversation(user):
|
| 134 |
+
clean_up(user)
|
| 135 |
+
return [None, [], None, []]
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
def updatePassword(txt):
|
| 138 |
return [txt.lower().strip(), "*********"]
|
|
|
|
| 154 |
if prompt.startswith('gpt4'):
|
| 155 |
gptModel = 'gpt-4o'
|
| 156 |
prompt = prompt[5:]
|
| 157 |
+
if prompt.startswith("clean"):
|
| 158 |
+
user = prompt[6:]
|
| 159 |
+
response = f'cleaned all .wav files for {user}'
|
| 160 |
+
final_clean_up(user)
|
| 161 |
+
return [past, response, None, gptModel]
|
| 162 |
if user_window in unames and pwd_window in pwdList:
|
| 163 |
past.append({"role":"user", "content":prompt})
|
| 164 |
completion = client.chat.completions.create(model=gptModel,
|
|
|
|
| 226 |
|
| 227 |
def set_speak_button(txt):
|
| 228 |
vis = False
|
| 229 |
+
if len(txt) > 2:
|
| 230 |
vis = True
|
| 231 |
return gr.Button(visible=vis)
|
| 232 |
|
| 233 |
+
def update_user(txt):
|
| 234 |
+
return txt.lower()
|
| 235 |
+
|
| 236 |
+
# def clean_up(q):
|
| 237 |
+
# for fname in q:
|
| 238 |
+
# if os.path.exists(fname):
|
| 239 |
+
# os.remove(fname)
|
| 240 |
+
# return []
|
| 241 |
|
| 242 |
+
def speech_worker(chunks=[],q=[]):
|
| 243 |
for chunk in chunks:
|
| 244 |
+
fpath = q.pop(0)
|
| 245 |
+
response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
|
| 246 |
+
with open(fpath, 'wb') as fp:
|
| 247 |
+
fp.write(response.content)
|
| 248 |
+
|
| 249 |
+
# def gen_speech_file(user):
|
| 250 |
+
# global speech_file_index
|
| 251 |
+
# speech_file_index += 1
|
| 252 |
+
# fname = dataDir + f'user_speech{speech_file_index}.wav'
|
| 253 |
+
# return fname
|
| 254 |
+
|
| 255 |
+
def gen_speech_file_names(user, cnt):
|
| 256 |
+
rv = []
|
| 257 |
+
for i in range(0, cnt):
|
| 258 |
+
rv.append(dataDir + f'{user}_speech{i}.wav')
|
| 259 |
+
return rv
|
| 260 |
|
| 261 |
+
def final_clean_up(user):
|
| 262 |
+
flist = glob(dataDir + f'{user}_speech*.wav')
|
| 263 |
+
for fpath in flist:
|
| 264 |
+
try:
|
| 265 |
+
os.remove(fpath)
|
| 266 |
+
except:
|
| 267 |
+
continue
|
| 268 |
|
| 269 |
with gr.Blocks() as demo:
|
| 270 |
+
history = gr.State([])
|
| 271 |
+
password = gr.State("")
|
| 272 |
+
user = gr.State("")
|
| 273 |
+
model = gr.State("gpt-4o-mini")
|
| 274 |
+
q = gr.State([])
|
| 275 |
+
qsave = gr.State([])
|
| 276 |
+
|
| 277 |
+
def clean_up(user):
|
| 278 |
+
flist = glob(dataDir + f'{user}_speech*.wav')
|
| 279 |
+
for fpath in flist:
|
| 280 |
+
try:
|
| 281 |
+
os.remove(fpath)
|
| 282 |
+
except:
|
| 283 |
+
continue
|
| 284 |
+
|
| 285 |
def initial_audio_output(txt, user):
|
| 286 |
global digits
|
| 287 |
global abbrevs
|
| 288 |
+
q = []
|
| 289 |
+
if len(txt.strip()) < 5:
|
| 290 |
+
return ['None', q]
|
| 291 |
for s,x in abbrevs.items():
|
| 292 |
txt = txt.replace(s, x)
|
| 293 |
words_in = txt.replace('**', '').splitlines(False)
|
|
|
|
| 346 |
chunk = chunklist[0]
|
| 347 |
if chunk.strip() == '':
|
| 348 |
return gr.Audio(sources=None)
|
| 349 |
+
fname_list = gen_speech_file_names(user, len(chunklist))
|
| 350 |
+
q = fname_list.copy()
|
| 351 |
+
qsave = fname_list.copy()
|
| 352 |
+
fname = q.pop(0)
|
| 353 |
+
if len(chunklist) > 0:
|
| 354 |
+
threading.Thread(target=speech_worker, daemon=True, args=(chunklist[1:],fname_list[1:])).start()
|
| 355 |
+
response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
|
| 356 |
+
with open(fname, 'wb') as fp:
|
| 357 |
+
fp.write(response.content)
|
| 358 |
+
return [fname, q]
|
| 359 |
|
| 360 |
+
def gen_output_audio(q):
|
| 361 |
try:
|
| 362 |
+
fname = q.pop(0)
|
| 363 |
except:
|
| 364 |
+
return [None, gr.Audio(sources=None)]
|
| 365 |
+
return [fname, q]
|
| 366 |
+
|
| 367 |
|
|
|
|
|
|
|
|
|
|
| 368 |
gr.Markdown('# GPT Chat')
|
| 369 |
gr.Markdown('Enter user name & password then enter prompt and click submit button. Restart conversation if topic changes')
|
| 370 |
gr.Markdown('You can enter prompts by voice. Tap Record, speak, then tap Stop.' +
|
|
|
|
| 372 |
# heading = gr.Label(value="GPT Chat", scale=2, color="Crimson" )
|
| 373 |
with gr.Row():
|
| 374 |
user_window = gr.Textbox(label = "User Name")
|
| 375 |
+
user_window.blur(update_user, user_window, user)
|
| 376 |
pwd_window = gr.Textbox(label = "Password")
|
| 377 |
pwd_window.blur(updatePassword, pwd_window, [password, pwd_window])
|
| 378 |
with gr.Row():
|
|
|
|
| 387 |
speak_output = gr.Button(value="Speak Dialog", visible=False)
|
| 388 |
prompt_window = gr.Textbox(label = "Prompt or Question")
|
| 389 |
output_window = gr.Textbox(label = "Dialog")
|
| 390 |
+
submit_button.click(chat, inputs=[prompt_window, user, password, history, output_window, model],
|
| 391 |
outputs=[history, output_window, prompt_window, model])
|
| 392 |
+
clear_button.click(fn=new_conversation, inputs=user, outputs=[prompt_window, history, output_window])
|
| 393 |
+
audio_widget.stop_recording(fn=transcribe, inputs=[user, password, audio_widget],
|
| 394 |
outputs=[prompt_window])
|
| 395 |
audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
|
| 396 |
reset_button.add(audio_widget)
|
| 397 |
audio_out = gr.Audio(autoplay=True, visible=False)
|
| 398 |
+
audio_out.stop(fn=gen_output_audio, inputs=q, outputs = [audio_out, q])
|
| 399 |
+
speak_output.click(fn=initial_audio_output, inputs=[output_window, user], outputs=[audio_out, q])
|
| 400 |
output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
|
| 401 |
+
demo.unload(final_clean_up(user))
|
| 402 |
demo.launch(share=True)
|