Spaces:
Running
Running
Niki Zhang
commited on
Update app.py
Browse filesFunction update
app.py
CHANGED
@@ -463,13 +463,13 @@ def upload_callback(image_input, state, visual_chatgpt=None, openai_api_key=None
|
|
463 |
parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
|
464 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
465 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
466 |
-
artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
467 |
paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
468 |
|
469 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
470 |
|
471 |
return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
|
472 |
-
original_size, input_size,
|
473 |
|
474 |
|
475 |
|
@@ -512,7 +512,8 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
512 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
513 |
text = out['generated_captions']['raw_caption']
|
514 |
input_mask = np.array(out['mask'].convert('P'))
|
515 |
-
|
|
|
516 |
|
517 |
click_index_state = click_index
|
518 |
input_mask_state = input_mask
|
@@ -531,7 +532,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
531 |
print(generated_caption)
|
532 |
print("new crop save",new_crop_save_path)
|
533 |
|
534 |
-
yield state, state, click_state,
|
535 |
|
536 |
|
537 |
|
@@ -545,11 +546,11 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
545 |
input_mask = input_mask_state
|
546 |
input_points = input_points_state
|
547 |
input_labels = input_labels_state
|
548 |
-
out = out_state
|
549 |
focus_map = {
|
550 |
-
"
|
551 |
-
"
|
552 |
-
"
|
|
|
553 |
}
|
554 |
|
555 |
mapped_value = focus_map.get(focus_type, -1)
|
@@ -565,6 +566,7 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
565 |
prompt_list = [
|
566 |
'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
|
567 |
'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
|
|
|
568 |
'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
|
569 |
]
|
570 |
|
@@ -596,14 +598,14 @@ def submit_caption(image_input, state, generated_caption, text_refiner, visual_c
|
|
596 |
input_points=input_points, input_labels=input_labels)
|
597 |
|
598 |
if generated_caption:
|
599 |
-
state = state + [(None, f"RAW_Caption: {generated_caption}")]
|
600 |
|
601 |
|
602 |
if not args.disable_gpt and text_refiner:
|
603 |
print("new crop save",new_crop_save_path)
|
604 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
605 |
|
606 |
-
state = state + [(None, f"Wiki: {paragraph}")]
|
607 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
608 |
print("new_cap",focus_info)
|
609 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
@@ -765,6 +767,24 @@ def clear_chat_memory(visual_chatgpt, keep_global=False):
|
|
765 |
visual_chatgpt.current_image = None
|
766 |
visual_chatgpt.global_prompt = ""
|
767 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
768 |
def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
|
769 |
|
770 |
model = build_caption_anything_with_models(
|
@@ -874,14 +894,26 @@ def create_ui():
|
|
874 |
image_intro=gr.HTML()
|
875 |
image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
876 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
877 |
|
878 |
with gr.Tab("Click") as click_tab:
|
879 |
image_intro_click=gr.HTML()
|
880 |
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
881 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
|
|
|
|
|
|
|
|
|
|
882 |
with gr.Row(scale=1.0):
|
883 |
focus_type = gr.Radio(
|
884 |
-
choices=["
|
885 |
value="Inside the Mark",
|
886 |
label="Focus Type",
|
887 |
interactive=True)
|
@@ -975,6 +1007,10 @@ def create_ui():
|
|
975 |
with gr.Row():
|
976 |
clear_button_text = gr.Button(value="Clear Text", interactive=True)
|
977 |
submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
|
|
|
|
|
|
|
|
|
978 |
|
979 |
with gr.Column(scale=0.5):
|
980 |
# TTS interface hidden initially
|
@@ -1189,14 +1225,14 @@ def create_ui():
|
|
1189 |
|
1190 |
image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
|
1191 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1192 |
-
image_embedding, original_size, input_size,
|
1193 |
|
1194 |
image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
1195 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1196 |
-
image_embedding, original_size, input_size,
|
1197 |
sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
1198 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1199 |
-
image_embedding, original_size, input_size,
|
1200 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
|
1201 |
[chatbot, state, aux_state])
|
1202 |
chat_input.submit(lambda: "", None, chat_input)
|
@@ -1205,7 +1241,7 @@ def create_ui():
|
|
1205 |
submit_button_text.click(lambda: "", None, chat_input)
|
1206 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
1207 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1208 |
-
image_embedding, original_size, input_size,
|
1209 |
|
1210 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
1211 |
|
@@ -1242,7 +1278,7 @@ def create_ui():
|
|
1242 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
1243 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
1244 |
],
|
1245 |
-
outputs=[chatbot, state, click_state, image_input, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
1246 |
show_progress=False, queue=True
|
1247 |
)
|
1248 |
|
@@ -1273,6 +1309,13 @@ def create_ui():
|
|
1273 |
outputs=[chatbot, state, sketcher_input],
|
1274 |
show_progress=False, queue=True
|
1275 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1276 |
|
1277 |
|
1278 |
|
|
|
463 |
parsed_data = get_image_gpt(openai_api_key, new_image_path,"Please provide the name, artist, year of creation, and material used for this painting. Return the information in dictionary format without any newline characters. If any information is unavailable, return \"None\" for that field. Format as follows: { \"name\": \"Name of the painting\",\"artist\": \"Name of the artist\", \"year\": \"Year of creation\", \"material\": \"Material used in the painting\" }.")
|
464 |
parsed_data = json.loads(parsed_data.replace("'", "\""))
|
465 |
name, artist, year, material= parsed_data["name"],parsed_data["artist"],parsed_data["year"], parsed_data["material"]
|
466 |
+
# artwork_info = f"<div>Painting: {name}<br>Artist name: {artist}<br>Year: {year}<br>Material: {material}</div>"
|
467 |
paragraph = get_image_gpt(openai_api_key, new_image_path,f"What's going on in this picture? in {language}")
|
468 |
|
469 |
state = [(None, 'Received new image, resize it to width {} and height {}: '.format(image_input.size[0], image_input.size[1]))]
|
470 |
|
471 |
return state, state, image_input, click_state, image_input, image_input, image_input, image_embedding, \
|
472 |
+
original_size, input_size, f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",f"Name: {name}", f"Artist: {artist}", f"Year: {year}", f"Material: {material}",paragraph
|
473 |
|
474 |
|
475 |
|
|
|
512 |
update_click_state(click_state, out['generated_captions']['raw_caption'], click_mode)
|
513 |
text = out['generated_captions']['raw_caption']
|
514 |
input_mask = np.array(out['mask'].convert('P'))
|
515 |
+
image_input_nobackground = mask_painter(np.array(image_input), input_mask,background_alpha=0)
|
516 |
+
image_input_withbackground=mask_painter(np.array(image_input), input_mask)
|
517 |
|
518 |
click_index_state = click_index
|
519 |
input_mask_state = input_mask
|
|
|
532 |
print(generated_caption)
|
533 |
print("new crop save",new_crop_save_path)
|
534 |
|
535 |
+
yield state, state, click_state, image_input_nobackground, image_input_withbackground, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path
|
536 |
|
537 |
|
538 |
|
|
|
546 |
input_mask = input_mask_state
|
547 |
input_points = input_points_state
|
548 |
input_labels = input_labels_state
|
|
|
549 |
focus_map = {
|
550 |
+
"CFV-D":0,
|
551 |
+
"CFV-DA":1,
|
552 |
+
"PFV-DA":2,
|
553 |
+
"PFV-DAI":3
|
554 |
}
|
555 |
|
556 |
mapped_value = focus_map.get(focus_type, -1)
|
|
|
566 |
prompt_list = [
|
567 |
'Wiki_caption: {Wiki_caption}, you have to generate a caption according to the image and wiki caption. Around {length} words of {sentiment} sentiment in {language}.',
|
568 |
'Wiki_caption: {Wiki_caption}, you have to select sentences from wiki caption that describe the surrounding objects that may be associated with the picture object. Around {length} words of {sentiment} sentiment in {language}.',
|
569 |
+
'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.',
|
570 |
'Wiki_caption: {Wiki_caption}. You have to choose sentences from the wiki caption that describe unrelated objects to the image. Around {length} words of {sentiment} sentiment in {language}.'
|
571 |
]
|
572 |
|
|
|
598 |
input_points=input_points, input_labels=input_labels)
|
599 |
|
600 |
if generated_caption:
|
601 |
+
# state = state + [(None, f"RAW_Caption: {generated_caption}")]
|
602 |
|
603 |
|
604 |
if not args.disable_gpt and text_refiner:
|
605 |
print("new crop save",new_crop_save_path)
|
606 |
focus_info=get_image_gpt(openai_api_key,new_crop_save_path,prompt)
|
607 |
|
608 |
+
# state = state + [(None, f"Wiki: {paragraph}")]
|
609 |
state = state + [(None, f"Focus_Caption: {focus_info}")]
|
610 |
print("new_cap",focus_info)
|
611 |
refined_image_input = create_bubble_frame(np.array(origin_image_input), focus_info, click_index, input_mask,
|
|
|
767 |
visual_chatgpt.current_image = None
|
768 |
visual_chatgpt.global_prompt = ""
|
769 |
|
770 |
+
|
771 |
+
def export_chat_log(chat_state):
|
772 |
+
try:
|
773 |
+
if not chat_state:
|
774 |
+
return None
|
775 |
+
chat_log = "\n".join(f"{entry[0]}\n{entry[1]}" for entry in chat_state if entry)
|
776 |
+
print("export log...")
|
777 |
+
print("chat_log",chat_log)
|
778 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as temp_file:
|
779 |
+
temp_file.write(chat_log.encode('utf-8'))
|
780 |
+
temp_file_path = temp_file.name
|
781 |
+
print(temp_file_path)
|
782 |
+
return temp_file_path
|
783 |
+
except Exception as e:
|
784 |
+
print(f"An error occurred while exporting the chat log: {e}")
|
785 |
+
return None
|
786 |
+
|
787 |
+
|
788 |
def cap_everything(image_input, visual_chatgpt, text_refiner,input_language, input_audio, input_mic, use_mic, agree):
|
789 |
|
790 |
model = build_caption_anything_with_models(
|
|
|
894 |
image_intro=gr.HTML()
|
895 |
image_input_base = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
896 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
897 |
+
with gr.Row():
|
898 |
+
name_label_base = gr.Button(value="Name: ")
|
899 |
+
artist_label_base = gr.Button(value="Artist: ")
|
900 |
+
year_label_base = gr.Button(value="Year: ")
|
901 |
+
material_label_base = gr.Button(value="Material: ")
|
902 |
+
|
903 |
+
|
904 |
|
905 |
with gr.Tab("Click") as click_tab:
|
906 |
image_intro_click=gr.HTML()
|
907 |
image_input = gr.Image(type="pil", interactive=True, elem_id="image_upload")
|
908 |
example_image = gr.Image(type="pil", interactive=False, visible=False)
|
909 |
+
with gr.Row():
|
910 |
+
name_label = gr.Button(value="Name: ")
|
911 |
+
artist_label = gr.Button(value="Artist: ")
|
912 |
+
year_label = gr.Button(value="Year: ")
|
913 |
+
material_label = gr.Button(value="Material: ")
|
914 |
with gr.Row(scale=1.0):
|
915 |
focus_type = gr.Radio(
|
916 |
+
choices=["CFV-D", "CFV-DA", "PFV-DA","PFV-DAI"],
|
917 |
value="Inside the Mark",
|
918 |
label="Focus Type",
|
919 |
interactive=True)
|
|
|
1007 |
with gr.Row():
|
1008 |
clear_button_text = gr.Button(value="Clear Text", interactive=True)
|
1009 |
submit_button_text = gr.Button(value="Submit", interactive=True, variant="primary")
|
1010 |
+
with gr.Row():
|
1011 |
+
export_button = gr.Button(value="Export Chat Log", interactive=True, variant="primary")
|
1012 |
+
with gr.Row():
|
1013 |
+
chat_log_file = gr.File(label="Download Chat Log")
|
1014 |
|
1015 |
with gr.Column(scale=0.5):
|
1016 |
# TTS interface hidden initially
|
|
|
1225 |
|
1226 |
image_input_base.upload(upload_callback, [image_input_base, state, visual_chatgpt,openai_api_key],
|
1227 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1228 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
|
1229 |
|
1230 |
image_input.upload(upload_callback, [image_input, state, visual_chatgpt, openai_api_key],
|
1231 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1232 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
|
1233 |
sketcher_input.upload(upload_callback, [sketcher_input, state, visual_chatgpt, openai_api_key],
|
1234 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1235 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
|
1236 |
chat_input.submit(chat_input_callback, [visual_chatgpt, chat_input, click_state, state, aux_state],
|
1237 |
[chatbot, state, aux_state])
|
1238 |
chat_input.submit(lambda: "", None, chat_input)
|
|
|
1241 |
submit_button_text.click(lambda: "", None, chat_input)
|
1242 |
example_image.change(upload_callback, [example_image, state, visual_chatgpt, openai_api_key],
|
1243 |
[chatbot, state, origin_image, click_state, image_input, image_input_base, sketcher_input,
|
1244 |
+
image_embedding, original_size, input_size,name_label,artist_label,year_label,material_label,name_label_base, artist_label_base, year_label_base, material_label_base,paragraph])
|
1245 |
|
1246 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
1247 |
|
|
|
1278 |
image_embedding, state, click_state, original_size, input_size, text_refiner, visual_chatgpt,
|
1279 |
out_state, click_index_state, input_mask_state, input_points_state, input_labels_state
|
1280 |
],
|
1281 |
+
outputs=[chatbot, state, click_state, image_input, input_image, generated_caption, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path],
|
1282 |
show_progress=False, queue=True
|
1283 |
)
|
1284 |
|
|
|
1309 |
outputs=[chatbot, state, sketcher_input],
|
1310 |
show_progress=False, queue=True
|
1311 |
)
|
1312 |
+
|
1313 |
+
export_button.click(
|
1314 |
+
export_chat_log,
|
1315 |
+
inputs=[state],
|
1316 |
+
outputs=[chat_log_file],
|
1317 |
+
queue=True
|
1318 |
+
)
|
1319 |
|
1320 |
|
1321 |
|