dlflannery commited on
Commit
62faa72
·
verified ·
1 Parent(s): b34c7b3

Update app.py

Browse files

Added image analysis

Files changed (1) hide show
  1. app.py +111 -27
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import gradio as gr
3
  # import openai
@@ -159,8 +160,13 @@ def genUsageStats(do_reset=False):
159
  return result
160
 
161
  def new_conversation(user):
162
- clean_up(user)
163
- return [None, [], None, gr.Image(visible=False, value=None)]
 
 
 
 
 
164
 
165
  def updatePassword(txt):
166
  password = txt.lower().strip()
@@ -169,18 +175,17 @@ def updatePassword(txt):
169
  # def setModel(val):
170
  # return val
171
 
172
- def chat(prompt, user_window, pwd_window, past, response, gptModel):
173
  user_window = user_window.lower().strip()
174
  isBoss = False
175
  if user_window == unames[0] and pwd_window == pwdList[0]:
176
  isBoss = True
177
  if prompt == 'stats':
178
  response = genUsageStats()
179
- # list_permanent_files()
180
- return [past, response, None, gptModel]
181
  if prompt == 'reset':
182
  response = genUsageStats(True)
183
- return [past, response, None, gptModel]
184
  if prompt.startswith('gpt4'):
185
  gptModel = 'gpt-4o'
186
  prompt = prompt[5:]
@@ -188,15 +193,21 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
188
  user = prompt[6:]
189
  response = f'cleaned all .wav files for {user}'
190
  final_clean_up(user)
191
- return [past, response, None, gptModel]
192
  if prompt.startswith('files'):
193
  (log_cnt, wav_cnt, other_cnt, others, log_list) = list_permanent_files()
194
  response = f'{log_cnt} log files\n{wav_cnt} .wav files\n{other_cnt} Other files:\n{others}\nlogs: {str(log_list)}'
195
- return [past, response, None, gptModel]
196
  if user_window in unames and pwd_window == pwdList[unames.index(user_window)]:
197
  past.append({"role":"user", "content":prompt})
198
- completion = client.chat.completions.create(model=gptModel,
 
199
  messages=past)
 
 
 
 
 
200
  reply = completion.choices[0].message.content
201
  tokens_in = completion.usage.prompt_tokens
202
  tokens_out = completion.usage.completion_tokens
@@ -222,9 +233,9 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
222
  sleep(3)
223
  if not accessOk:
224
  response += f"\nDATA LOG FAILED, path = {dataFile}"
225
- return [past, response , None, gptModel]
226
  else:
227
- return [[], "User name and/or password are incorrect", prompt, gptModel]
228
 
229
  def new_func(user):
230
  dataFile = dataDir + user + '_log.txt'
@@ -336,10 +347,14 @@ def make_image(prompt, user, pwd):
336
  try:
337
  response = client.images.generate(model='dall-e-2', prompt=prompt,size='512x512',
338
  quality='standard', response_format='b64_json')
 
 
 
 
339
  image_data = response.data[0].b64_json
340
- image = Image.open(BytesIO(base64.b64decode(image_data)))
341
- fpath = dataDir + user + '.png'
342
- image.save(fpath)
343
  with open(image_count_path(user), 'at') as fp:
344
  fp.write('1\n')
345
  msg = 'Image created!'
@@ -357,16 +372,25 @@ def show_help():
357
  tapping the audio "Record" button, saying your prompt, then tapping the "Stop" button.
358
  Your prompt will appear in the Prompt window, and you can edit it there if needed.
359
  3. Chat:
360
- 1.1 tap the "Submit Prompt/Question" button. The response will appear in the Dialog window.
361
- 1.2 To speak the response, tap the "Speak Dialog" button.
362
- 1.3 Enter follow-up questions in the Prompt window either by typing or speaking. Tap the voice
363
  entry "Reset Voice Entry" button to enable additional voice entry. Then tap "Submit Prompt/Question".
364
- 1.4 If topic changes or when done chatting, tap the "Restart Conversation" button.
365
- 4. Make Image:
366
- 1.1 Enter description of desired image in prompt window via either typing or voice entry
367
- 1.2 Tap the "Make Image" button. This can take a few seconds.
368
- 1.3 There is a download button on the image display if your system supports file downloads.
369
- 1.4 When done viewing image, tap the "Restart Conversation" button
 
 
 
 
 
 
 
 
 
370
 
371
  Hints:
372
  1. Better chat and image results are obtained by including detailed descriptions and instructions
@@ -377,6 +401,57 @@ def show_help():
377
  can edit what's to be spoken. Except: In a chat conversation, spoken dialog will only include
378
  the latest prompt/response ("YOU:/GPT:") sequence.'''
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
 
382
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -386,6 +461,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
386
  model = gr.State("gpt-4o-mini")
387
  q = gr.State([])
388
  qsave = gr.State([])
 
389
 
390
  def clean_up(user):
391
  flist = glob(dataDir + f'{user}_speech*.wav')
@@ -504,14 +580,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
504
  # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
505
  # value="gpt-3.5-turbo", label="GPT Model", interactive=True)
506
  button_do_image = gr.Button(value='Make Image')
 
507
  submit_button = gr.Button(value="Submit Prompt/Question")
508
  speak_output = gr.Button(value="Speak Dialog", visible=False)
509
  prompt_window = gr.Textbox(label = "Prompt or Question")
510
  output_window = gr.Textbox(label = "Dialog")
511
- image_window = gr.Image(visible=False)
512
- submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model],
513
- outputs=[history, output_window, prompt_window, model])
514
- clear_button.click(fn=new_conversation, inputs=user_window, outputs=[prompt_window, history, output_window, image_window])
 
 
 
 
 
515
  audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
516
  outputs=[prompt_window])
517
  audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
@@ -523,5 +605,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
523
  button_do_image.click(fn=make_image, inputs=[prompt_window,user_window, password],outputs=[image_window, output_window])
524
  image_window.change(fn=delete_image, inputs=[user])
525
  help_button.click(fn=show_help, outputs=output_window)
 
 
526
  # demo.unload(final_clean_up(user))
527
  demo.launch(share=True)
 
1
+ from ast import Interactive
2
  import os
3
  import gradio as gr
4
  # import openai
 
160
  return result
161
 
162
  def new_conversation(user):
163
+ clean_up(user) # .wav files
164
+ flist = glob(f'{dataDir}{user}.png')
165
+ flist.extend(glob(f'{dataDir}{user}_image.b64'))
166
+ for fpath in flist:
167
+ if os.path.exists(fpath):
168
+ os.remove(fpath)
169
+ return [None, [], None, gr.Image(visible=False, value=None), gr.Image(visible=False, value=None), '']
170
 
171
  def updatePassword(txt):
172
  password = txt.lower().strip()
 
175
  # def setModel(val):
176
  # return val
177
 
178
+ def chat(prompt, user_window, pwd_window, past, response, gptModel, uploaded_image_file=''):
179
  user_window = user_window.lower().strip()
180
  isBoss = False
181
  if user_window == unames[0] and pwd_window == pwdList[0]:
182
  isBoss = True
183
  if prompt == 'stats':
184
  response = genUsageStats()
185
+ return [past, response, None, gptModel, uploaded_image_file]
 
186
  if prompt == 'reset':
187
  response = genUsageStats(True)
188
+ return [past, response, None, gptModel, uploaded_image_file]
189
  if prompt.startswith('gpt4'):
190
  gptModel = 'gpt-4o'
191
  prompt = prompt[5:]
 
193
  user = prompt[6:]
194
  response = f'cleaned all .wav files for {user}'
195
  final_clean_up(user)
196
+ return [past, response, None, gptModel, uploaded_image_file]
197
  if prompt.startswith('files'):
198
  (log_cnt, wav_cnt, other_cnt, others, log_list) = list_permanent_files()
199
  response = f'{log_cnt} log files\n{wav_cnt} .wav files\n{other_cnt} Other files:\n{others}\nlogs: {str(log_list)}'
200
+ return [past, response, None, gptModel, uploaded_image_file]
201
  if user_window in unames and pwd_window == pwdList[unames.index(user_window)]:
202
  past.append({"role":"user", "content":prompt})
203
+ if uploaded_image_file == '':
204
+ completion = client.chat.completions.create(model=gptModel,
205
  messages=past)
206
+ else:
207
+ (completion, msg) = analyze_image(user_window, gptModel)
208
+ uploaded_image_file= ''
209
+ if not msg == 'ok':
210
+ return [past, msg, None, gptModel, uploaded_image_file]
211
  reply = completion.choices[0].message.content
212
  tokens_in = completion.usage.prompt_tokens
213
  tokens_out = completion.usage.completion_tokens
 
233
  sleep(3)
234
  if not accessOk:
235
  response += f"\nDATA LOG FAILED, path = {dataFile}"
236
+ return [past, response , None, gptModel, uploaded_image_file]
237
  else:
238
+ return [[], "User name and/or password are incorrect", prompt, gptModel, uploaded_image_file]
239
 
240
  def new_func(user):
241
  dataFile = dataDir + user + '_log.txt'
 
347
  try:
348
  response = client.images.generate(model='dall-e-2', prompt=prompt,size='512x512',
349
  quality='standard', response_format='b64_json')
350
+ except Exception as ex:
351
+ msg = ex.message
352
+ return [gr.Image(visible=False, value=None), msg]
353
+ try:
354
  image_data = response.data[0].b64_json
355
+ with Image.open(BytesIO(base64.b64decode(image_data))) as image:
356
+ fpath = dataDir + user + '.png'
357
+ image.save(fpath)
358
  with open(image_count_path(user), 'at') as fp:
359
  fp.write('1\n')
360
  msg = 'Image created!'
 
372
  tapping the audio "Record" button, saying your prompt, then tapping the "Stop" button.
373
  Your prompt will appear in the Prompt window, and you can edit it there if needed.
374
  3. Chat:
375
+ 3.1 tap the "Submit Prompt/Question" button. The response will appear in the Dialog window.
376
+ 3.2 To speak the response, tap the "Speak Dialog" button.
377
+ 3.3 Enter follow-up questions in the Prompt window either by typing or speaking. Tap the voice
378
  entry "Reset Voice Entry" button to enable additional voice entry. Then tap "Submit Prompt/Question".
379
+ 3.4 If topic changes or when done chatting, tap the "Restart Conversation" button.
380
+ 4. (OR) Make Image:
381
+ 4.1 Enter description of desired image in prompt window via either typing or voice entry
382
+ 4.2 Tap the "Make Image" button. This can take a few seconds.
383
+ 4.3 There is a download button on the image display if your system supports file downloads.
384
+ 4.4 When done viewing image, tap the "Restart Conversation" button
385
+ 5. (OR) Analyze an Image you provide:
386
+ 5.1 Enter what you want to know about the image in the prompt window. You can include instructions
387
+ to write a poem about something in the image, for example.
388
+ 5.2 Tap the "Upload & Analyze Image" button.
389
+ 5.3 An empty image box will appear lower left. Drag or upload image into it. It offers web cam input
390
+ also but I have not tried that yet.
391
+ 5.4 The image should appear. This can take some time with a slow internet connection and large image.
392
+ 5.5 Tap the "Submit Prompt/Question" button to start the analysis. This initiates a chat dialog and
393
+ you can ask follow-up questions.
394
 
395
  Hints:
396
  1. Better chat and image results are obtained by including detailed descriptions and instructions
 
401
  can edit what's to be spoken. Except: In a chat conversation, spoken dialog will only include
402
  the latest prompt/response ("YOU:/GPT:") sequence.'''
403
 
404
+ def upload_image(prompt, user, password):
405
+ if not (user in unames and password == pwdList[unames.index(user)]):
406
+ return [gr.Image(visible=False, interactive=True), "Incorrect user name and/or password"]
407
+ if len(prompt) < 3:
408
+ return [gr.Image(visible=False, interactive=True), "You must provide prompt/instructions (what to do with the image)"]
409
+ return [gr.Image(visible=True, interactive=True), '']
410
+
411
+ def load_image(image, user):
412
+ status = 'ok'
413
+ try:
414
+ with open(image, 'rb') as image_file:
415
+ base64_image = base64.b64encode(image_file.read()).decode('utf-8')
416
+ fpath = dataDir + user + '_image.b64'
417
+ with open(fpath, 'wt') as fp:
418
+ fp.write(base64_image)
419
+ except:
420
+ status = 'Unable to create base64 image'
421
+ return [fpath, status]
422
+
423
+ def analyze_image(user, model):
424
+ status = 'ok'
425
+ try:
426
+ with open(dataDir + user + '_image.b64', 'rt') as fp:
427
+ base64_image = fp.read()
428
+ except:
429
+ status = "base64 image file not found"
430
+ return [None, status]
431
+
432
+ completion = client.chat.completions.create(
433
+ model=model,
434
+ messages=[
435
+ { "role": "user",
436
+ "content": [
437
+ {
438
+ "type": "text",
439
+ "text": "What's in this image?"
440
+ },
441
+ {
442
+ "type": "image_url",
443
+ "image_url": {
444
+ "url": f"data:image/jpeg;base64,{base64_image}",
445
+ "detail": "low"
446
+ }
447
+ }
448
+ ]
449
+ }
450
+ ],
451
+ max_tokens= 300
452
+ )
453
+ # response = completion.choices[0].message.content
454
+ return [completion, status]
455
 
456
 
457
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
461
  model = gr.State("gpt-4o-mini")
462
  q = gr.State([])
463
  qsave = gr.State([])
464
+ uploaded_image_file = gr.State('')
465
 
466
  def clean_up(user):
467
  flist = glob(dataDir + f'{user}_speech*.wav')
 
580
  # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
581
  # value="gpt-3.5-turbo", label="GPT Model", interactive=True)
582
  button_do_image = gr.Button(value='Make Image')
583
+ button_get_image = gr.Button(value='Upload & Analyze Image')
584
  submit_button = gr.Button(value="Submit Prompt/Question")
585
  speak_output = gr.Button(value="Speak Dialog", visible=False)
586
  prompt_window = gr.Textbox(label = "Prompt or Question")
587
  output_window = gr.Textbox(label = "Dialog")
588
+ with gr.Row():
589
+ with gr.Column():
590
+ image_window2 = gr.Image(visible=False, interactive=True, label='Image to Analyze', type='filepath')
591
+ with gr.Column():
592
+ image_window = gr.Image(visible=False, label='Generated Image')
593
+ submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model, uploaded_image_file],
594
+ outputs=[history, output_window, prompt_window, model, uploaded_image_file])
595
+ clear_button.click(fn=new_conversation, inputs=user_window,
596
+ outputs=[prompt_window, history, output_window, image_window, image_window2, uploaded_image_file])
597
  audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
598
  outputs=[prompt_window])
599
  audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
 
605
  button_do_image.click(fn=make_image, inputs=[prompt_window,user_window, password],outputs=[image_window, output_window])
606
  image_window.change(fn=delete_image, inputs=[user])
607
  help_button.click(fn=show_help, outputs=output_window)
608
+ button_get_image.click(fn=upload_image,inputs = [prompt_window, user, password], outputs = [image_window2, output_window])
609
+ image_window2.upload(fn=load_image, inputs=[image_window2, user], outputs=[uploaded_image_file, output_window])
610
  # demo.unload(final_clean_up(user))
611
  demo.launch(share=True)