Niki Zhang commited on
Commit
260b902
·
verified ·
1 Parent(s): c070b97

Update app.py

Browse files

Function update

Files changed (1) hide show
  1. app.py +59 -16
app.py CHANGED
@@ -463,13 +463,13 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
463
  parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
464
  parsed_data = json.loads(parsed_data.replace("'", "\""))
465
  name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
466
- artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
467
  paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
468
 
469
  state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
470
 
471
  return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
472
- original_size, input_size, artwork_info,artwork_info,paragraph
473
 
474
 
475
 
@@ -512,7 +512,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
512
  update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
513
  text = out['generated_captions']['raw_caption']
514
  input_mask = np.array(out['mask'].convert('P'))
515
- image_input = mask_painter(np.array(image_input), input_mask)
 
516
 
517
  click_index_state = click_index
518
  input_mask_state = input_mask
@@ -531,7 +532,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
531
  print(generated_caption)
532
  print("new crop save",new_crop_save_path)
533
 
534
- yield state, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
535
 
536
 
537
 
@@ -545,11 +546,11 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
545
  input_mask = input_mask_state
546
  input_points = input_points_state
547
  input_labels = input_labels_state
548
- out = out_state
549
  focus_map = {
550
- "Inside the Mark": 0,
551
- "Around the Mark": 1,
552
- "Outside the Mark": 2
 
553
  }
554
 
555
  mapped_value = focus_map.get(focus_type, -1)
@@ -565,6 +566,7 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
565
  prompt_list = [
566
  'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
567
  'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
 
568
  'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
569
  ]
570
 
@@ -596,14 +598,14 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
596
  input_points=input_points, input_labels=input_labels)
597
 
598
  if generated_caption:
599
- state = state + [(None, f"RAW_Caption: {generated_caption}")]
600
 
601
 
602
  if not args.disable_gpt and text_refiner:
603
  print("new crop save",new_crop_save_path)
604
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
605
 
606
- state = state + [(None, f"Wiki: {paragraph}")]
607
  state = state + [(None, f"Focus_Caption: {focus_info}")]
608
  print("new_cap",focus_info)
609
  refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
@@ -765,6 +767,24 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
765
  visual_chatgpt.current_image = None
766
  visual_chatgpt.global_prompt = ""
767
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
768
  def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
769
 
770
  model = build_caption_anything_with_models(
@@ -874,14 +894,26 @@ def create_ui():
874
  image_intro=gr.HTML()
875
  image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
876
  example_image = gr.Image(type="pil", interactive=False, visible=False)
 
 
 
 
 
 
 
877
 
878
  with gr.Tab("Click") as click_tab:
879
  image_intro_click=gr.HTML()
880
  image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
881
  example_image = gr.Image(type="pil", interactive=False, visible=False)
 
 
 
 
 
882
  with gr.Row(scale=1.0):
883
  focus_type = gr.Radio(
884
- choices=["Inside the Mark", "Around the Mark", "Outside the Mark"],
885
  value="Inside the Mark",
886
  label="Focus Type",
887
  interactive=True)
@@ -975,6 +1007,10 @@ def create_ui():
975
  with gr.Row():
976
  clear_button_text = gr.Button(value="Clear Text", interactive=True)
977
  submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
 
 
 
 
978
 
979
  with gr.Column(scale=0.5):
980
  # TTS interface hidden initially
@@ -1189,14 +1225,14 @@ def create_ui():
1189
 
1190
  image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
1191
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1192
- image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
1193
 
1194
  image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
1195
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1196
- image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
1197
  sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
1198
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1199
- image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
1200
  chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
1201
  [chatbot, state, aux_state])
1202
  chat_input.submit(lambda: "", None, chat_input)
@@ -1205,7 +1241,7 @@ def create_ui():
1205
  submit_button_text.click(lambda: "", None, chat_input)
1206
  example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
1207
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1208
- image_embedding, original_size, input_size,image_intro,image_intro_click,paragraph])
1209
 
1210
  example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
1211
 
@@ -1242,7 +1278,7 @@ def create_ui():
1242
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
1243
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
1244
  ],
1245
- outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
1246
  show_progress=False, queue=True
1247
  )
1248
 
@@ -1273,6 +1309,13 @@ def create_ui():
1273
  outputs=[chatbot, state, sketcher_input],
1274
  show_progress=False, queue=True
1275
  )
 
 
 
 
 
 
 
1276
 
1277
 
1278
 
 
463
  parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
464
  parsed_data = json.loads(parsed_data.replace("'", "\""))
465
  name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
466
+ # artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
467
  paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
468
 
469
  state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
470
 
471
  return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
472
+ original_size, input_size, f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",paragraph
473
 
474
 
475
 
 
512
  update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
513
  text = out['generated_captions']['raw_caption']
514
  input_mask = np.array(out['mask'].convert('P'))
515
+ image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
516
+ image_input_withbackground=mask_painter(np.array(image_input), input_mask)
517
 
518
  click_index_state = click_index
519
  input_mask_state = input_mask
 
532
  print(generated_caption)
533
  print("new crop save",new_crop_save_path)
534
 
535
+ yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
536
 
537
 
538
 
 
546
  input_mask = input_mask_state
547
  input_points = input_points_state
548
  input_labels = input_labels_state
 
549
  focus_map = {
550
+ "CFV-D":0,
551
+ "CFV-DA":1,
552
+ "PFV-DA":2,
553
+ "PFV-DAI":3
554
  }
555
 
556
  mapped_value = focus_map.get(focus_type, -1)
 
566
  prompt_list = [
567
  'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
568
  'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
569
+ 'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.',
570
  'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
571
  ]
572
 
 
598
  input_points=input_points, input_labels=input_labels)
599
 
600
  if generated_caption:
601
+ # state = state + [(None, f"RAW_Caption: {generated_caption}")]
602
 
603
 
604
  if not args.disable_gpt and text_refiner:
605
  print("new crop save",new_crop_save_path)
606
  focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
607
 
608
+ # state = state + [(None, f"Wiki: {paragraph}")]
609
  state = state + [(None, f"Focus_Caption: {focus_info}")]
610
  print("new_cap",focus_info)
611
  refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
 
767
  visual_chatgpt.current_image = None
768
  visual_chatgpt.global_prompt = ""
769
 
770
+
771
+ def export_chat_log(chat_state):
772
+ try:
773
+ if not chat_state:
774
+ return None
775
+ chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
776
+ print("export log...")
777
+ print("chat_log",chat_log)
778
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
779
+ temp_file.write(chat_log.encode('utf-8'))
780
+ temp_file_path = temp_file.name
781
+ print(temp_file_path)
782
+ return temp_file_path
783
+ except Exception as e:
784
+ print(f"An error occurred while exporting the chat log: {e}")
785
+ return None
786
+
787
+
788
  def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
789
 
790
  model = build_caption_anything_with_models(
 
894
  image_intro=gr.HTML()
895
  image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
896
  example_image = gr.Image(type="pil", interactive=False, visible=False)
897
+ with gr.Row():
898
+ name_label_base = gr.Button(value="Name: ")
899
+ artist_label_base = gr.Button(value="Artist: ")
900
+ year_label_base = gr.Button(value="Year: ")
901
+ material_label_base = gr.Button(value="Material: ")
902
+
903
+
904
 
905
  with gr.Tab("Click") as click_tab:
906
  image_intro_click=gr.HTML()
907
  image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
908
  example_image = gr.Image(type="pil", interactive=False, visible=False)
909
+ with gr.Row():
910
+ name_label = gr.Button(value="Name: ")
911
+ artist_label = gr.Button(value="Artist: ")
912
+ year_label = gr.Button(value="Year: ")
913
+ material_label = gr.Button(value="Material: ")
914
  with gr.Row(scale=1.0):
915
  focus_type = gr.Radio(
916
+ choices=["CFV-D", "CFV-DA", "PFV-DA","PFV-DAI"],
917
  value="Inside the Mark",
918
  label="Focus Type",
919
  interactive=True)
 
1007
  with gr.Row():
1008
  clear_button_text = gr.Button(value="Clear Text", interactive=True)
1009
  submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
1010
+ with gr.Row():
1011
+ export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
1012
+ with gr.Row():
1013
+ chat_log_file = gr.File(label="Download Chat Log")
1014
 
1015
  with gr.Column(scale=0.5):
1016
  # TTS interface hidden initially
 
1225
 
1226
  image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
1227
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1228
+ image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
1229
 
1230
  image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
1231
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1232
+ image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
1233
  sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
1234
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1235
+ image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
1236
  chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
1237
  [chatbot, state, aux_state])
1238
  chat_input.submit(lambda: "", None, chat_input)
 
1241
  submit_button_text.click(lambda: "", None, chat_input)
1242
  example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
1243
  [chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
1244
+ image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
1245
 
1246
  example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
1247
 
 
1278
  image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
1279
  out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
1280
  ],
1281
+ outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
1282
  show_progress=False, queue=True
1283
  )
1284
 
 
1309
  outputs=[chatbot, state, sketcher_input],
1310
  show_progress=False, queue=True
1311
  )
1312
+
1313
+ export_button.click(
1314
+ export_chat_log,
1315
+ inputs=[state],
1316
+ outputs=[chat_log_file],
1317
+ queue=True
1318
+ )
1319
 
1320
 
1321