dlflannery commited on
Commit
839c46d
·
verified ·
1 Parent(s): cb59748

Update app.py

Browse files

First ver. with gr 4.43.0

Files changed (1) hide show
  1. app.py +420 -8
app.py CHANGED
@@ -1,13 +1,425 @@
1
  import os
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def sum(a, b):
5
- return str(float(a) + float(b))
6
 
7
  with gr.Blocks() as demo:
8
- input_a = gr.Textbox(label = 'Value 1', value = '0')
9
- input_b = gr.Textbox(label = 'Value 2', value = '0')
10
- result_box = gr.Textbox(label = 'Result', value = '0')
11
- button_go = gr.Button(value = 'GO')
12
- button_go.click(fn=sum, inputs = [input_a, input_b], outputs = result_box)
13
- demo.launch(share = True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
+ # import openai
4
+ from numpy._core.defchararray import endswith, isdecimal
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+ from pathlib import Path
8
+ from time import sleep
9
+ import audioread
10
+ import queue
11
+ import threading
12
+ from glob import glob
13
+ import copy
14
+
15
+ load_dotenv(override=True)
16
+ key = os.getenv('OPENAI_API_KEY')
17
+ users = os.getenv('LOGNAME')
18
+ unames = users.split(',')
19
+ pwds = os.getenv('PASSWORD')
20
+ pwdList = pwds.split(',')
21
+
22
+ site = os.getenv('SITE')
23
+ if site == 'local':
24
+ dp = Path('./data')
25
+ dp.mkdir(exist_ok=True)
26
+ dataDir = './data/'
27
+ else:
28
+ dp = Path('/data')
29
+ dp.mkdir(exist_ok=True)
30
+ dataDir = '/data/'
31
+
32
+ speak_file = dataDir + "speek.wav"
33
+
34
+ client = OpenAI(api_key = key)
35
+
36
+ #digits = ['zero: ','one: ','two: ','three: ','four: ','five: ','six: ','seven: ','eight: ','nine: ']
37
+
38
+ abbrevs = {'St. ' : 'Saint ', 'Mr. ': 'mister ', 'Mrs. ':'mussus ', 'Mr. ':'mister ', 'Ms. ':'mizz '}
39
+
40
+ def genUsageStats(do_reset=False):
41
+ result = []
42
+ ttotal4o_in = 0
43
+ ttotal4o_out = 0
44
+ ttotal4mini_in = 0
45
+ ttotal4mini_out = 0
46
+ totalAudio = 0
47
+ totalSpeech = 0
48
+ for user in unames:
49
+ tokens4o_in = 0
50
+ tokens4o_out = 0
51
+ tokens4mini_in = 0
52
+ tokens4mini_out = 0
53
+ fp = dataDir + user + '_log.txt'
54
+ if os.path.exists(fp):
55
+ accessOk = False
56
+ for i in range(3):
57
+ try:
58
+ with open(fp) as f:
59
+ dataList = f.readlines()
60
+ if do_reset:
61
+ os.remove(fp)
62
+ else:
63
+ for line in dataList:
64
+ (u, t) = line.split(':')
65
+ (t, m) = t.split('-')
66
+ (tin, tout) = t.split('/')
67
+ incount = int(tin)
68
+ outcount = int(tout)
69
+ if 'mini' in m:
70
+ tokens4mini_in += incount
71
+ tokens4mini_out += outcount
72
+ ttotal4mini_in += incount
73
+ ttotal4mini_out += outcount
74
+ else:
75
+ tokens4o_in += incount
76
+ tokens4o_out += outcount
77
+ ttotal4o_in += incount
78
+ ttotal4o_out += outcount
79
+ accessOk = True
80
+ break
81
+ except:
82
+ sleep(3)
83
+ if not accessOk:
84
+ return f'File access failed reading stats for user: {user}'
85
+ userAudio = 0
86
+ fp = dataDir + user + '_audio.txt'
87
+ if os.path.exists(fp):
88
+ accessOk = False
89
+ for i in range(3):
90
+ try:
91
+ with open(fp) as f:
92
+ dataList = f.readlines()
93
+ if do_reset:
94
+ os.remove(fp)
95
+ else:
96
+ for line in dataList:
97
+ (dud, len) = line.split(':')
98
+ userAudio += int(len)
99
+ totalAudio += int(userAudio)
100
+ accessOk = True
101
+ break
102
+ except:
103
+ sleep(3)
104
+ if not accessOk:
105
+ return f'File access failed reading audio stats for user: {user}'
106
+ userSpeech = 0
107
+ fp = dataDir + user + '_speech.txt'
108
+ if os.path.exists(fp):
109
+ accessOk = False
110
+ for i in range(3):
111
+ try:
112
+ with open(fp) as f:
113
+ dataList = f.readlines()
114
+ if do_reset:
115
+ os.remove(fp)
116
+ else:
117
+ for line in dataList:
118
+ (dud, len) = line.split(':')
119
+ userSpeech += int(len)
120
+ totalSpeech += int(userSpeech)
121
+ accessOk = True
122
+ break
123
+ except:
124
+ sleep(3)
125
+ if not accessOk:
126
+ return f'File access failed reading speech stats for user: {user}'
127
+ result.append([user, f'{tokens4mini_in}/{tokens4mini_out}', f'{tokens4o_in}/{tokens4o_out}', f'audio:{userAudio}',f'speech:{userSpeech}'])
128
+ result.append(['totals', f'{ttotal4mini_in}/{ttotal4mini_out}', f'{ttotal4o_in}/{ttotal4o_out}', f'audio:{totalAudio}',f'speech:{totalSpeech}'])
129
+ return result
130
+
131
+ def new_conversation(user):
132
+ clean_up(user)
133
+ return [None, [], None, []]
134
+
135
+ def updatePassword(txt):
136
+ password = txt.lower().strip()
137
+ return [password, "*********"]
138
+
139
+ # def setModel(val):
140
+ # return val
141
+
142
+ def chat(prompt, user_window, pwd_window, past, response, gptModel):
143
+ user_window = user_window.lower().strip()
144
+ isBoss = False
145
+ if user_window == unames[0] and pwd_window == pwdList[0]:
146
+ isBoss = True
147
+ if prompt == 'stats':
148
+ response = genUsageStats()
149
+ list_permanent_files()
150
+ return [past, response, None, gptModel]
151
+ if prompt == 'reset':
152
+ response = genUsageStats(True)
153
+ return [past, response, None, gptModel]
154
+ if prompt.startswith('gpt4'):
155
+ gptModel = 'gpt-4o'
156
+ prompt = prompt[5:]
157
+ if prompt.startswith("clean"):
158
+ user = prompt[6:]
159
+ response = f'cleaned all .wav files for {user}'
160
+ final_clean_up(user)
161
+ return [past, response, None, gptModel]
162
+ if prompt.startswith('files'):
163
+ (log_cnt, wav_cnt, other_cnt, others) = list_permanent_files()
164
+ response = f'{log_cnt} log files\n{wav_cnt} .wav files\n{other_cnt} Other files:\n{others}'
165
+ return [past, response, None, gptModel]
166
+ if user_window in unames and pwd_window == pwdList[unames.index(user_window)]:
167
+ past.append({"role":"user", "content":prompt})
168
+ completion = client.chat.completions.create(model=gptModel,
169
+ messages=past)
170
+ reply = completion.choices[0].message.content
171
+ tokens_in = completion.usage.prompt_tokens
172
+ tokens_out = completion.usage.completion_tokens
173
+ tokens = completion.usage.total_tokens
174
+ response += "\n\nYOU: " + prompt + "\nGPT: " + reply
175
+ if isBoss:
176
+ response += f"\n{gptModel}: tokens in/out = {tokens_in}/{tokens_out}"
177
+ if tokens > 40000:
178
+ response += "\n\nTHIS DIALOG IS GETTING TOO LONG. PLEASE RESTART CONVERSATION SOON."
179
+ past.append({"role":"assistant", "content": reply})
180
+ accessOk = False
181
+ for i in range(3):
182
+ try:
183
+ dataFile = new_func(user_window)
184
+ with open(dataFile, 'a') as f:
185
+ m = '4o'
186
+ if 'mini' in gptModel:
187
+ m = '4omini'
188
+ f.write(f'{user_window}:{tokens_in}/{tokens_out}-{m}\n')
189
+ accessOk = True
190
+ break
191
+ except Exception as e:
192
+ sleep(3)
193
+ if not accessOk:
194
+ response += f"\nDATA LOG FAILED, path = {dataFile}"
195
+ return [past, response , None, gptModel]
196
+ else:
197
+ return [[], "User name and/or password are incorrect", prompt, gptModel]
198
+
199
+ def new_func(user):
200
+ dataFile = dataDir + user + '_log.txt'
201
+ return dataFile
202
+
203
+ def transcribe(user, pwd, fpath):
204
+ user = user.lower().strip()
205
+ pwd = pwd.lower().strip()
206
+ if not (user in unames and pwd in pwdList):
207
+ return 'Bad credentials'
208
+ with audioread.audio_open(fpath) as audio:
209
+ duration = int(audio.duration)
210
+ if duration > 0:
211
+ with open(dataDir + user + '_audio.txt','a') as f:
212
+ f.write(f'audio:{str(duration)}\n')
213
+ with open(fpath,'rb') as audio_file:
214
+ transcript = client.audio.transcriptions.create(
215
+ model='whisper-1', file = audio_file ,response_format = 'text' )
216
+ reply = transcript
217
+ return str(reply)
218
+
219
+ def pause_message():
220
+ return "Audio input is paused. Resume or Stop as desired"
221
+
222
+ # def gen_output_audio(txt):
223
+ # if len(txt) < 10:
224
+ # txt = "This dialog is too short to mess with!"
225
+ # response = client.audio.speech.create(model="tts-1", voice="fable", input=txt)
226
+ # with open(speak_file, 'wb') as fp:
227
+ # fp.write(response.content)
228
+ # return speak_file
229
+
230
+
231
+ def set_speak_button(txt):
232
+ vis = False
233
+ if len(txt) > 2:
234
+ vis = True
235
+ return gr.Button(visible=vis)
236
+
237
+ def update_user(user_win):
238
+ user_win = user_win.lower().strip()
239
+ user = 'unknown'
240
+ for s in unames:
241
+ if user_win == s:
242
+ user = s
243
+ break
244
+ return [user, user]
245
+
246
+ def speech_worker(chunks=[],q=[]):
247
+ for chunk in chunks:
248
+ fpath = q.pop(0)
249
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
250
+ with open(fpath, 'wb') as fp:
251
+ fp.write(response.content)
252
+
253
+ def gen_speech_file_names(user, cnt):
254
+ rv = []
255
+ for i in range(0, cnt):
256
+ rv.append(dataDir + f'{user}_speech{i}.wav')
257
+ return rv
258
+
259
+ def final_clean_up(user):
260
+ if user.strip().lower() == 'all':
261
+ flist = glob(dataDir + '*_speech*.wav')
262
+ else:
263
+ flist = glob(dataDir + f'{user}_speech*.wav')
264
+ for fpath in flist:
265
+ try:
266
+ os.remove(fpath)
267
+ except:
268
+ continue
269
+
270
+
271
+ def list_permanent_files():
272
+ flist = os.listdir(dataDir)
273
+ others = []
274
+ log_cnt = 0
275
+ wav_cnt = 0
276
+ other_cnt = 0
277
+ for fpath in flist:
278
+ if fpath.endswith('.txt'):
279
+ log_cnt += 1
280
+ elif fpath.endswith('.wav'):
281
+ wav_cnt += 1
282
+ else:
283
+ others.append(fpath)
284
+ other_cnt = len(others)
285
+ return (str(log_cnt), str(wav_cnt), str(other_cnt), str(others))
286
 
 
 
287
 
288
  with gr.Blocks() as demo:
289
+ history = gr.State([])
290
+ password = gr.State("")
291
+ user = gr.State("unknown")
292
+ model = gr.State("gpt-4o-mini")
293
+ q = gr.State([])
294
+ qsave = gr.State([])
295
+
296
+ def clean_up(user):
297
+ flist = glob(dataDir + f'{user}_speech*.wav')
298
+ for fpath in flist:
299
+ try:
300
+ os.remove(fpath)
301
+ except:
302
+ continue
303
+
304
+ def initial_audio_output(txt, user):
305
+ global digits
306
+ global abbrevs
307
+ if not user in unames:
308
+ return [gr.Audio(sources=None), []]
309
+ clean_up(user)
310
+ q = []
311
+ if len(txt.strip()) < 5:
312
+ return ['None', q]
313
+ for s,x in abbrevs.items():
314
+ txt = txt.replace(s, x)
315
+ words_in = txt.replace('**', '').splitlines(False)
316
+ words_out = []
317
+ for s in words_in:
318
+ s = s.lstrip('- *@#$%^&_=+-')
319
+ if len(s) > 0:
320
+ loc = s.index(' ')
321
+ if loc > 1:
322
+ val = s[0:loc]
323
+ isnum = val.replace('.','0').isdecimal()
324
+ if isnum:
325
+ if val.endswith('.'):
326
+ val = val[:-1].replace('.',' point ') + '., '
327
+ else:
328
+ val = val.replace('.', ' point ') + ', '
329
+ s = 'num'+ val + s[loc:]
330
+ words_out.append(s)
331
+ chunklist = []
332
+ for chunk in words_out:
333
+ if chunk.strip() == '':
334
+ continue
335
+ isnumbered = chunk.startswith('num')
336
+ number = ''
337
+ loc = 0
338
+ if isnumbered:
339
+ chunk = chunk[3:]
340
+ loc = chunk.index(',')
341
+ number = chunk[0:loc]
342
+ chunk = chunk[loc:]
343
+ locs = []
344
+ for i in range(1,len(chunk)-1):
345
+ (a, b, c) = chunk[i-1:i+2]
346
+ if a.isdecimal() and b == '.' and c.isdecimal():
347
+ locs.append(i)
348
+ for i in locs:
349
+ chunk = chunk[:i] + ' point ' + chunk[i+1:]
350
+ if len(chunk) > 50:
351
+ finechunks = chunk.split('.')
352
+ for fchunk in finechunks:
353
+ if isnumbered:
354
+ fchunk = number + fchunk
355
+ isnumbered = False
356
+ if len(fchunk) > 0:
357
+ if fchunk != '"':
358
+ chunklist.append(fchunk)
359
+ else:
360
+ line = number + chunk
361
+ if line != '"':
362
+ chunklist.append(line)
363
+ total_speech = 0
364
+ for chunk in chunklist:
365
+ total_speech += len(chunk)
366
+ with open(dataDir + user + '_speech.txt','a') as f:
367
+ f.write(f'speech:{str(total_speech)}\n')
368
+ chunk = chunklist[0]
369
+ if chunk.strip() == '':
370
+ return gr.Audio(sources=None)
371
+ fname_list = gen_speech_file_names(user, len(chunklist))
372
+ q = fname_list.copy()
373
+ qsave = fname_list.copy()
374
+ fname = q.pop(0)
375
+ if len(chunklist) > 0:
376
+ threading.Thread(target=speech_worker, daemon=True, args=(chunklist[1:],fname_list[1:])).start()
377
+ response = client.audio.speech.create(model="tts-1", voice="fable", input=chunk, speed=0.85, response_format='wav')
378
+ with open(fname, 'wb') as fp:
379
+ fp.write(response.content)
380
+ return [fname, q]
381
+
382
+ def gen_output_audio(q, user):
383
+ try:
384
+ fname = q.pop(0)
385
+ except:
386
+ final_clean_up(user)
387
+ return [None, gr.Audio(sources=None)]
388
+ return [fname, q]
389
+
390
+
391
+ gr.Markdown('# GPT Chat')
392
+ gr.Markdown('Enter user name & password then enter prompt and click submit button. Restart conversation if topic changes. ' +
393
+ 'You can enter prompts by voice. Tap "Record", speak, then tap "Stop". ' +
394
+ 'Tap "Reset Voice Entry" to enter more voice. Tap "Speak Dialog" to hear dialog. ' +
395
+ 'Note: first voice response may take a longer time.')
396
+ with gr.Row():
397
+ user_window = gr.Textbox(label = "User Name")
398
+ user_window.blur(fn=update_user, inputs=user_window, outputs=[user, user_window])
399
+ pwd_window = gr.Textbox(label = "Password")
400
+ pwd_window.blur(updatePassword, inputs = pwd_window, outputs = [password, pwd_window])
401
+ with gr.Row():
402
+ audio_widget = gr.Audio(type='filepath', format='wav',waveform_options=gr.WaveformOptions(
403
+ show_recording_waveform=True), sources=['microphone'], scale = 3, label="Prompt/Question Voice Entry", max_length=120)
404
+ reset_button = gr.ClearButton(value="Reset Voice Entry", scale=1) #new_func1()
405
+ with gr.Row():
406
+ clear_button = gr.Button(value="Restart Conversation")
407
+ # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
408
+ # value="gpt-3.5-turbo", label="GPT Model", interactive=True)
409
+ submit_button = gr.Button(value="Submit Prompt/Question")
410
+ speak_output = gr.Button(value="Speak Dialog", visible=False)
411
+ prompt_window = gr.Textbox(label = "Prompt or Question")
412
+ output_window = gr.Textbox(label = "Dialog")
413
+ submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model],
414
+ outputs=[history, output_window, prompt_window, model])
415
+ clear_button.click(fn=new_conversation, inputs=user_window, outputs=[prompt_window, history, output_window])
416
+ audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
417
+ outputs=[prompt_window])
418
+ audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
419
+ reset_button.add(audio_widget)
420
+ audio_out = gr.Audio(autoplay=True, visible=False)
421
+ audio_out.stop(fn=gen_output_audio, inputs=[q, user_window], outputs = [audio_out, q])
422
+ speak_output.click(fn=initial_audio_output, inputs=[output_window, user_window], outputs=[audio_out, q])
423
+ output_window.change(fn=set_speak_button, inputs=output_window,outputs=speak_output)
424
+ # demo.unload(final_clean_up(user))
425
+ demo.launch(share=True)