Spaces:

dlflannery
/

GradioTest

Running

App Files Files Community

dlflannery commited on Sep 13, 2024

Commit

62faa72

verified ·

1 Parent(s): b34c7b3

Update app.py

Browse files

Added image analysis

Files changed (1) hide show

app.py +111 -27

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import gradio as gr
 # import openai
@@ -159,8 +160,13 @@ def genUsageStats(do_reset=False):
     return result
 def new_conversation(user):
-    clean_up(user)
-    return [None, [], None, gr.Image(visible=False, value=None)]
 def updatePassword(txt):
     password = txt.lower().strip()
@@ -169,18 +175,17 @@ def updatePassword(txt):
 # def setModel(val):
 #     return val
-def chat(prompt, user_window, pwd_window, past, response, gptModel):
     user_window = user_window.lower().strip()
     isBoss = False
     if user_window == unames[0] and pwd_window == pwdList[0]:
         isBoss = True
         if prompt == 'stats':
             response = genUsageStats()
-            # list_permanent_files()
-            return [past, response, None, gptModel]
         if prompt == 'reset':
             response = genUsageStats(True)
-            return [past, response, None, gptModel]
         if prompt.startswith('gpt4'):
             gptModel = 'gpt-4o'
             prompt = prompt[5:]
@@ -188,15 +193,21 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
             user = prompt[6:]
             response = f'cleaned all .wav files for {user}'
             final_clean_up(user)
-            return [past, response, None, gptModel]
         if prompt.startswith('files'):
             (log_cnt, wav_cnt, other_cnt, others, log_list) = list_permanent_files()
             response = f'{log_cnt} log files\n{wav_cnt} .wav files\n{other_cnt} Other files:\n{others}\nlogs: {str(log_list)}'
-            return [past, response, None, gptModel]
     if user_window in unames and pwd_window == pwdList[unames.index(user_window)]:
         past.append({"role":"user", "content":prompt})
-        completion = client.chat.completions.create(model=gptModel,
                                             messages=past)
         reply = completion.choices[0].message.content
         tokens_in = completion.usage.prompt_tokens
         tokens_out = completion.usage.completion_tokens
@@ -222,9 +233,9 @@ def chat(prompt, user_window, pwd_window, past, response, gptModel):
                 sleep(3)
         if not accessOk:
             response += f"\nDATA LOG FAILED, path = {dataFile}"
-        return [past, response , None, gptModel]
     else:
-        return [[], "User name and/or password are incorrect", prompt, gptModel]
 def new_func(user):
     dataFile = dataDir + user + '_log.txt'
@@ -336,10 +347,14 @@ def make_image(prompt, user, pwd):
         try:
             response = client.images.generate(model='dall-e-2', prompt=prompt,size='512x512',
                quality='standard', response_format='b64_json')
             image_data = response.data[0].b64_json
-            image = Image.open(BytesIO(base64.b64decode(image_data)))
-            fpath = dataDir + user + '.png'
-            image.save(fpath)
             with open(image_count_path(user), 'at') as fp:
                 fp.write('1\n')
             msg = 'Image created!'
@@ -357,16 +372,25 @@ def show_help():
         tapping the audio "Record" button, saying your prompt, then tapping the "Stop" button.
         Your prompt will appear in the Prompt window, and you can edit it there if needed.
     3.  Chat:
-          1.1 tap the "Submit Prompt/Question" button.  The response will appear in the Dialog window.
-          1.2 To speak the response, tap the "Speak Dialog" button.
-          1.3 Enter follow-up questions in the Prompt window either by typing or speaking. Tap the voice
               entry "Reset Voice Entry" button to enable additional voice entry. Then tap "Submit Prompt/Question".
-          1.4 If topic changes or when done chatting, tap the "Restart Conversation" button.
-    4.  Make Image:
-          1.1 Enter description of desired image in prompt window via either typing or voice entry
-          1.2 Tap the "Make Image" button.  This can take a few seconds.
-          1.3 There is a download button on the image display if your system supports file downloads.
-          1.4 When done viewing image, tap the "Restart Conversation" button
     Hints:
         1. Better chat and image results are obtained by including detailed descriptions and instructions
@@ -377,6 +401,57 @@ def show_help():
              can edit what's to be spoken.  Except:  In a chat conversation, spoken dialog will only include
              the latest prompt/response ("YOU:/GPT:") sequence.'''
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -386,6 +461,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     model = gr.State("gpt-4o-mini")
     q = gr.State([])
     qsave = gr.State([])
     def clean_up(user):
         flist = glob(dataDir + f'{user}_speech*.wav')
@@ -504,14 +580,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
         #                      value="gpt-3.5-turbo", label="GPT Model", interactive=True)
         button_do_image = gr.Button(value='Make Image')
         submit_button = gr.Button(value="Submit Prompt/Question")
         speak_output = gr.Button(value="Speak Dialog", visible=False)
     prompt_window = gr.Textbox(label = "Prompt or Question")
     output_window = gr.Textbox(label = "Dialog")
-    image_window = gr.Image(visible=False)
-    submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model],
-                         outputs=[history, output_window, prompt_window, model])
-    clear_button.click(fn=new_conversation, inputs=user_window, outputs=[prompt_window, history, output_window, image_window])
     audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
                                 outputs=[prompt_window])
     audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
@@ -523,5 +605,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     button_do_image.click(fn=make_image, inputs=[prompt_window,user_window, password],outputs=[image_window, output_window])
     image_window.change(fn=delete_image, inputs=[user])
     help_button.click(fn=show_help, outputs=output_window)
     # demo.unload(final_clean_up(user))
 demo.launch(share=True)

+from ast import Interactive
 import os
 import gradio as gr
 # import openai
     return result
 def new_conversation(user):
+    clean_up(user)  # .wav files
+    flist = glob(f'{dataDir}{user}.png')
+    flist.extend(glob(f'{dataDir}{user}_image.b64'))
+    for fpath in flist:
+        if os.path.exists(fpath):
+            os.remove(fpath)
+    return [None, [], None, gr.Image(visible=False, value=None),  gr.Image(visible=False, value=None), '']
 def updatePassword(txt):
     password = txt.lower().strip()
 # def setModel(val):
 #     return val
+def chat(prompt, user_window, pwd_window, past, response, gptModel, uploaded_image_file=''):
     user_window = user_window.lower().strip()
     isBoss = False
     if user_window == unames[0] and pwd_window == pwdList[0]:
         isBoss = True
         if prompt == 'stats':
             response = genUsageStats()
+            return [past, response, None, gptModel, uploaded_image_file]
         if prompt == 'reset':
             response = genUsageStats(True)
+            return [past, response, None, gptModel, uploaded_image_file]
         if prompt.startswith('gpt4'):
             gptModel = 'gpt-4o'
             prompt = prompt[5:]
             user = prompt[6:]
             response = f'cleaned all .wav files for {user}'
             final_clean_up(user)
+            return [past, response, None, gptModel, uploaded_image_file]
         if prompt.startswith('files'):
             (log_cnt, wav_cnt, other_cnt, others, log_list) = list_permanent_files()
             response = f'{log_cnt} log files\n{wav_cnt} .wav files\n{other_cnt} Other files:\n{others}\nlogs: {str(log_list)}'
+            return [past, response, None, gptModel, uploaded_image_file]
     if user_window in unames and pwd_window == pwdList[unames.index(user_window)]:
         past.append({"role":"user", "content":prompt})
+        if uploaded_image_file == '':
+            completion = client.chat.completions.create(model=gptModel,
                                             messages=past)
+        else:
+            (completion, msg) = analyze_image(user_window, gptModel)
+            uploaded_image_file= ''
+            if not msg == 'ok':
+                return [past, msg, None, gptModel, uploaded_image_file]
         reply = completion.choices[0].message.content
         tokens_in = completion.usage.prompt_tokens
         tokens_out = completion.usage.completion_tokens
                 sleep(3)
         if not accessOk:
             response += f"\nDATA LOG FAILED, path = {dataFile}"
+        return [past, response , None, gptModel, uploaded_image_file]
     else:
+        return [[], "User name and/or password are incorrect", prompt, gptModel, uploaded_image_file]
 def new_func(user):
     dataFile = dataDir + user + '_log.txt'
         try:
             response = client.images.generate(model='dall-e-2', prompt=prompt,size='512x512',
                quality='standard', response_format='b64_json')
+        except Exception as ex:
+            msg = ex.message
+            return [gr.Image(visible=False, value=None), msg]
+        try:
             image_data = response.data[0].b64_json
+            with Image.open(BytesIO(base64.b64decode(image_data))) as image:
+                fpath = dataDir + user + '.png'
+                image.save(fpath)
             with open(image_count_path(user), 'at') as fp:
                 fp.write('1\n')
             msg = 'Image created!'
         tapping the audio "Record" button, saying your prompt, then tapping the "Stop" button.
         Your prompt will appear in the Prompt window, and you can edit it there if needed.
     3.  Chat:
+          3.1 tap the "Submit Prompt/Question" button.  The response will appear in the Dialog window.
+          3.2 To speak the response, tap the "Speak Dialog" button.
+          3.3 Enter follow-up questions in the Prompt window either by typing or speaking. Tap the voice
               entry "Reset Voice Entry" button to enable additional voice entry. Then tap "Submit Prompt/Question".
+          3.4 If topic changes or when done chatting, tap the "Restart Conversation" button.
+    4.  (OR) Make Image:
+          4.1 Enter description of desired image in prompt window via either typing or voice entry
+          4.2 Tap the "Make Image" button.  This can take a few seconds.
+          4.3 There is a download button on the image display if your system supports file downloads.
+          4.4 When done viewing image, tap the "Restart Conversation" button
+    5. (OR) Analyze an Image you provide:
+          5.1 Enter what you want to know about the image in the prompt window. You can include instructions
+               to write a poem about something in the image, for example.
+          5.2 Tap the "Upload & Analyze Image" button.
+          5.3 An empty image box will appear lower left. Drag or upload image into it. It offers web cam input
+               also but I have not tried that yet.
+          5.4 The image should appear. This can take some time with a slow internet connection and large image.
+          5.5 Tap the "Submit Prompt/Question" button to start the analysis.  This initiates a chat dialog and
+               you can ask follow-up questions.
     Hints:
         1. Better chat and image results are obtained by including detailed descriptions and instructions
              can edit what's to be spoken.  Except:  In a chat conversation, spoken dialog will only include
              the latest prompt/response ("YOU:/GPT:") sequence.'''
+def upload_image(prompt, user, password):
+    if not (user in unames and password == pwdList[unames.index(user)]):
+        return [gr.Image(visible=False, interactive=True), "Incorrect user name and/or password"]
+    if len(prompt) < 3:
+        return [gr.Image(visible=False, interactive=True), "You must provide prompt/instructions (what to do with the image)"]
+    return [gr.Image(visible=True, interactive=True), '']
+def load_image(image, user):
+    status = 'ok'
+    try:
+        with open(image, 'rb') as image_file:
+            base64_image = base64.b64encode(image_file.read()).decode('utf-8')
+        fpath = dataDir + user + '_image.b64'
+        with open(fpath, 'wt') as fp:
+            fp.write(base64_image)
+    except:
+        status = 'Unable to create base64 image'
+    return [fpath, status]
+def analyze_image(user, model):
+    status = 'ok'
+    try:
+        with open(dataDir + user + '_image.b64', 'rt') as fp:
+            base64_image = fp.read()
+    except:
+        status = "base64 image file not found"
+        return [None, status]
+    completion = client.chat.completions.create(
+        model=model,
+        messages=[
+            { "role": "user",
+               "content": [
+                   {
+                       "type": "text",
+                       "text": "What's in this image?"
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                            "detail": "low"
+                            }
+                    }
+                   ]
+               }
+            ],
+        max_tokens= 300
+    )
+    # response = completion.choices[0].message.content
+    return [completion, status]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     model = gr.State("gpt-4o-mini")
     q = gr.State([])
     qsave = gr.State([])
+    uploaded_image_file = gr.State('')
     def clean_up(user):
         flist = glob(dataDir + f'{user}_speech*.wav')
         # gpt_chooser=gr.Radio(choices=[("GPT-3.5","gpt-3.5-turbo"),("GPT-4o","gpt-4o-mini")],
         #                      value="gpt-3.5-turbo", label="GPT Model", interactive=True)
         button_do_image = gr.Button(value='Make Image')
+        button_get_image = gr.Button(value='Upload & Analyze Image')
         submit_button = gr.Button(value="Submit Prompt/Question")
         speak_output = gr.Button(value="Speak Dialog", visible=False)
     prompt_window = gr.Textbox(label = "Prompt or Question")
     output_window = gr.Textbox(label = "Dialog")
+    with gr.Row():
+        with gr.Column():
+            image_window2 = gr.Image(visible=False, interactive=True, label='Image to Analyze', type='filepath')
+        with gr.Column():
+            image_window = gr.Image(visible=False, label='Generated Image')
+    submit_button.click(chat, inputs=[prompt_window, user_window, password, history, output_window, model, uploaded_image_file],
+                         outputs=[history, output_window, prompt_window, model, uploaded_image_file])
+    clear_button.click(fn=new_conversation, inputs=user_window,
+                      outputs=[prompt_window, history, output_window, image_window, image_window2, uploaded_image_file])
     audio_widget.stop_recording(fn=transcribe, inputs=[user_window, password, audio_widget],
                                 outputs=[prompt_window])
     audio_widget.pause_recording(fn=pause_message, outputs=[prompt_window])
     button_do_image.click(fn=make_image, inputs=[prompt_window,user_window, password],outputs=[image_window, output_window])
     image_window.change(fn=delete_image, inputs=[user])
     help_button.click(fn=show_help, outputs=output_window)
+    button_get_image.click(fn=upload_image,inputs = [prompt_window, user, password], outputs = [image_window2, output_window])
+    image_window2.upload(fn=load_image, inputs=[image_window2, user], outputs=[uploaded_image_file, output_window])
     # demo.unload(final_clean_up(user))
 demo.launch(share=True)