EyeSee_chi

Running

App Files Files Community

Niki Zhang commited on Jun 1, 2024

Commit

260b902

verified ·

1 Parent(s): c070b97

Update app.py

Browse files

Function update

Files changed (1) hide show

app.py +59 -16

app.py CHANGED Viewed

@@ -463,13 +463,13 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
         parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
         parsed_data = json.loads(parsed_data.replace("'", "\""))
         name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
-        artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
         paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
     state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
     return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
-        original_size, input_size, artwork_info,artwork_info,paragraph
@@ -512,7 +512,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
     update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
     text = out['generated_captions']['raw_caption']
     input_mask = np.array(out['mask'].convert('P'))
-    image_input = mask_painter(np.array(image_input), input_mask)
     click_index_state = click_index
     input_mask_state = input_mask
@@ -531,7 +532,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
     print(generated_caption)
     print("new crop save",new_crop_save_path)
-    yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
@@ -545,11 +546,11 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
     input_mask = input_mask_state
     input_points = input_points_state
     input_labels = input_labels_state
-    out = out_state
     focus_map = {
-    "Inside the Mark": 0,
-    "Around the Mark": 1,
-    "Outside the Mark": 2
 }
     mapped_value = focus_map.get(focus_type, -1)
@@ -565,6 +566,7 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
     prompt_list = [
     'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
     'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
     'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
 ]
@@ -596,14 +598,14 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
                                       input_points=input_points, input_labels=input_labels)
     if generated_caption:
-        state = state + [(None, f"RAW_Caption: {generated_caption}")]
         if not args.disable_gpt and text_refiner:
             print("new crop save",new_crop_save_path)
             focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
-            state = state + [(None, f"Wiki: {paragraph}")]
             state = state + [(None, f"Focus_Caption: {focus_info}")]
             print("new_cap",focus_info)
             refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
@@ -765,6 +767,24 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
             visual_chatgpt.current_image = None
             visual_chatgpt.global_prompt = ""
 def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
     model = build_caption_anything_with_models(
@@ -874,14 +894,26 @@ def create_ui():
                         image_intro=gr.HTML()
                         image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
                     with gr.Tab("Click") as click_tab:
                         image_intro_click=gr.HTML()
                         image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
                         with gr.Row(scale=1.0):
                              focus_type = gr.Radio(
-                                    choices=["Inside the Mark", "Around the Mark", "Outside the Mark"],
                                     value="Inside the Mark",
                                     label="Focus Type",
                                     interactive=True)
@@ -975,6 +1007,10 @@ def create_ui():
                         with gr.Row():
                             clear_button_text = gr.Button(value="Clear Text", interactive=True)
                             submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
             with gr.Column(scale=0.5):
                 # TTS interface hidden initially
@@ -1189,14 +1225,14 @@ def create_ui():
         image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
                            [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
-                            image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
         image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
                            [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
-                            image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
         sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
                               [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
-                               image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
         chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
                           [chatbot, state, aux_state])
         chat_input.submit(lambda: "", None, chat_input)
@@ -1205,7 +1241,7 @@ def create_ui():
         submit_button_text.click(lambda: "", None, chat_input)
         example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
                              [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
-                              image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
         example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
@@ -1242,7 +1278,7 @@ def create_ui():
                 image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
                 out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
             ],
-            outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
             show_progress=False, queue=True
         )
@@ -1273,6 +1309,13 @@ def create_ui():
             outputs=[chatbot, state, sketcher_input],
             show_progress=False, queue=True
         )

         parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
         parsed_data = json.loads(parsed_data.replace("'", "\""))
         name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
+        # artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
         paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
     state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
     return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
+        original_size, input_size, f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",paragraph
     update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
     text = out['generated_captions']['raw_caption']
     input_mask = np.array(out['mask'].convert('P'))
+    image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
+    image_input_withbackground=mask_painter(np.array(image_input), input_mask)
     click_index_state = click_index
     input_mask_state = input_mask
     print(generated_caption)
     print("new crop save",new_crop_save_path)
+    yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
     input_mask = input_mask_state
     input_points = input_points_state
     input_labels = input_labels_state
     focus_map = {
+    "CFV-D":0,
+    "CFV-DA":1,
+    "PFV-DA":2,
+    "PFV-DAI":3
 }
     mapped_value = focus_map.get(focus_type, -1)
     prompt_list = [
     'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
     'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
+    'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.',
     'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
 ]
                                       input_points=input_points, input_labels=input_labels)
     if generated_caption:
+        # state = state + [(None, f"RAW_Caption: {generated_caption}")]
         if not args.disable_gpt and text_refiner:
             print("new crop save",new_crop_save_path)
             focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
+            # state = state + [(None, f"Wiki: {paragraph}")]
             state = state + [(None, f"Focus_Caption: {focus_info}")]
             print("new_cap",focus_info)
             refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
             visual_chatgpt.current_image = None
             visual_chatgpt.global_prompt = ""
+def export_chat_log(chat_state):
+    try:
+        if not chat_state:
+            return None
+        chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
+        print("export log...")
+        print("chat_log",chat_log)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
+            temp_file.write(chat_log.encode('utf-8'))
+            temp_file_path = temp_file.name
+            print(temp_file_path)
+        return temp_file_path
+    except Exception as e:
+        print(f"An error occurred while exporting the chat log: {e}")
+        return None
 def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
     model = build_caption_anything_with_models(
                         image_intro=gr.HTML()
                         image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
+                        with gr.Row():
+                            name_label_base = gr.Button(value="Name: ")
+                            artist_label_base = gr.Button(value="Artist: ")
+                            year_label_base = gr.Button(value="Year: ")
+                            material_label_base = gr.Button(value="Material: ")
                     with gr.Tab("Click") as click_tab:
                         image_intro_click=gr.HTML()
                         image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
                         example_image = gr.Image(type="pil", interactive=False, visible=False)
+                        with gr.Row():
+                            name_label = gr.Button(value="Name: ")
+                            artist_label = gr.Button(value="Artist: ")
+                            year_label = gr.Button(value="Year: ")
+                            material_label = gr.Button(value="Material: ")
                         with gr.Row(scale=1.0):
                              focus_type = gr.Radio(
+                                    choices=["CFV-D", "CFV-DA", "PFV-DA","PFV-DAI"],
                                     value="Inside the Mark",
                                     label="Focus Type",
                                     interactive=True)
                         with gr.Row():
                             clear_button_text = gr.Button(value="Clear Text", interactive=True)
                             submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
+                        with gr.Row():
+                            export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
+                        with gr.Row():
+                            chat_log_file = gr.File(label="Download Chat Log")
             with gr.Column(scale=0.5):
                 # TTS interface hidden initially
         image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
                            [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
+                            image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
         image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
                            [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
+                            image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
         sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
                               [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
+                               image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
         chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
                           [chatbot, state, aux_state])
         chat_input.submit(lambda: "", None, chat_input)
         submit_button_text.click(lambda: "", None, chat_input)
         example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
                              [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
+                              image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
         example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
                 image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
                 out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
             ],
+            outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
             show_progress=False, queue=True
         )
             outputs=[chatbot, state, sketcher_input],
             show_progress=False, queue=True
         )
+        export_button.click(
+    export_chat_log,
+    inputs=[state],
+    outputs=[chat_log_file],
+    queue=True
+)