Spaces:
Sleeping
Sleeping
Niki Zhang
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -347,74 +347,74 @@ def extract_features_siglip(image):
|
|
| 347 |
return image_features
|
| 348 |
|
| 349 |
@spaces.GPU
|
| 350 |
-
def infer(crop_image_path,full_image_path,state,language,
|
| 351 |
print("task type",task_type)
|
| 352 |
gallery_output = []
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
|
| 419 |
elif crop_image_path:
|
| 420 |
input_image = Image.open(crop_image_path).convert("RGB")
|
|
@@ -1090,7 +1090,7 @@ def inference_click(image_input, point_prompt, click_mode, enable_wiki, language
|
|
| 1090 |
Image.open(out["crop_save_path"]).save(new_crop_save_path)
|
| 1091 |
print("new crop save",new_crop_save_path)
|
| 1092 |
|
| 1093 |
-
|
| 1094 |
|
| 1095 |
|
| 1096 |
query_focus_en = [
|
|
@@ -1646,7 +1646,7 @@ async def texttospeech(text, language,gender='female'):
|
|
| 1646 |
return None
|
| 1647 |
|
| 1648 |
# give the reason of recommendation
|
| 1649 |
-
async def associate(image_path,new_crop,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,evt: gr.SelectData):
|
| 1650 |
persona=naritive_mapping[narritive]
|
| 1651 |
rec_path=evt._data['value']['image']['path']
|
| 1652 |
index=evt.index
|
|
@@ -1658,7 +1658,7 @@ async def associate(image_path,new_crop,openai_api_key,language,autoplay,length,
|
|
| 1658 |
image_paths=[image_path,rec_path]
|
| 1659 |
result=get_gpt_response(openai_api_key, image_paths, prompt)
|
| 1660 |
print("recommend result",result)
|
| 1661 |
-
|
| 1662 |
log_state = log_state + [(narritive, None)]
|
| 1663 |
log_state = log_state + [(f"image sort ranking {sort_score}", None)]
|
| 1664 |
log_state = log_state + [(None, f"{result}")]
|
|
@@ -1668,11 +1668,11 @@ async def associate(image_path,new_crop,openai_api_key,language,autoplay,length,
|
|
| 1668 |
audio_output=None
|
| 1669 |
if autoplay:
|
| 1670 |
audio_output = await texttospeech(read_info, language)
|
| 1671 |
-
return
|
| 1672 |
|
| 1673 |
-
def change_naritive(session_type,image_input,
|
| 1674 |
if session_type=="Session 1":
|
| 1675 |
-
return None, [], [], [[], [], []], "", None, []
|
| 1676 |
else:
|
| 1677 |
if language=="English":
|
| 1678 |
if narritive=="Third-person" :
|
|
@@ -1720,7 +1720,7 @@ def change_naritive(session_type,image_input, chatbot, state, click_state, parag
|
|
| 1720 |
]
|
| 1721 |
|
| 1722 |
|
| 1723 |
-
return image_input, state, state, click_state, paragraph, origin_image
|
| 1724 |
|
| 1725 |
|
| 1726 |
def print_like_dislike(x: gr.LikeData,state,log_state):
|
|
@@ -1766,7 +1766,7 @@ def create_ui():
|
|
| 1766 |
examples = [
|
| 1767 |
["test_images/1.The Ambassadors.jpg","test_images/task1.jpg","task 1"],
|
| 1768 |
["test_images/2.Football Players.jpg","test_images/task2.jpg","task 2"],
|
| 1769 |
-
["test_images/3.
|
| 1770 |
# ["test_images/test4.jpg"],
|
| 1771 |
# ["test_images/test5.jpg"],
|
| 1772 |
# ["test_images/Picture5.png"],
|
|
@@ -1810,6 +1810,7 @@ def create_ui():
|
|
| 1810 |
# store the whole image path
|
| 1811 |
image_path=gr.State('')
|
| 1812 |
pic_index=gr.State(None)
|
|
|
|
| 1813 |
|
| 1814 |
|
| 1815 |
with gr.Row():
|
|
@@ -1821,8 +1822,7 @@ def create_ui():
|
|
| 1821 |
)
|
| 1822 |
with gr.Row():
|
| 1823 |
with gr.Column(scale=1,min_width=50,visible=False) as instruct:
|
| 1824 |
-
task_instuction=gr.Image(type="pil", interactive=True, elem_classes="task_instruct",height=650,label=None)
|
| 1825 |
-
|
| 1826 |
with gr.Column(scale=6):
|
| 1827 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
| 1828 |
|
|
@@ -1941,6 +1941,7 @@ def create_ui():
|
|
| 1941 |
with gr.Column(scale=4):
|
| 1942 |
with gr.Column(visible=True) as module_key_input:
|
| 1943 |
openai_api_key = gr.Textbox(
|
|
|
|
| 1944 |
placeholder="Input openAI API key",
|
| 1945 |
show_label=False,
|
| 1946 |
label="OpenAI API Key",
|
|
@@ -2206,14 +2207,14 @@ def create_ui():
|
|
| 2206 |
# )
|
| 2207 |
recommend_btn.click(
|
| 2208 |
fn=infer,
|
| 2209 |
-
inputs=[new_crop_save_path,image_path,state,language,
|
| 2210 |
outputs=[gallery_result,chatbot,state]
|
| 2211 |
)
|
| 2212 |
|
| 2213 |
gallery_result.select(
|
| 2214 |
associate,
|
| 2215 |
-
inputs=[image_path,new_crop_save_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive],
|
| 2216 |
-
outputs=[recommend_bot,output_audio,log_state,pic_index,recommend_score],
|
| 2217 |
|
| 2218 |
|
| 2219 |
)
|
|
@@ -2434,11 +2435,18 @@ def create_ui():
|
|
| 2434 |
|
| 2435 |
# cap_everything_button.click(cap_everything, [paragraph, visual_chatgpt, language,auto_play],
|
| 2436 |
# [paragraph_output,output_audio])
|
| 2437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2438 |
clear_button_click.click(
|
| 2439 |
-
|
| 2440 |
[origin_image],
|
| 2441 |
-
[click_state, image_input],
|
| 2442 |
queue=False,
|
| 2443 |
show_progress=False
|
| 2444 |
)
|
|
@@ -2525,10 +2533,10 @@ def create_ui():
|
|
| 2525 |
paragraph,artist,gender,image_path, log_state,history_log,output_audio])
|
| 2526 |
|
| 2527 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
| 2528 |
-
example_image.change(
|
| 2529 |
-
|
| 2530 |
-
|
| 2531 |
-
|
| 2532 |
|
| 2533 |
# def on_click_tab_selected():
|
| 2534 |
# if gpt_state ==1:
|
|
@@ -2672,20 +2680,21 @@ def create_ui():
|
|
| 2672 |
|
| 2673 |
naritive.change(
|
| 2674 |
change_naritive,
|
| 2675 |
-
[session_type, image_input,
|
| 2676 |
-
|
|
|
|
| 2677 |
queue=False,
|
| 2678 |
show_progress=False
|
| 2679 |
|
| 2680 |
)
|
| 2681 |
def session_change():
|
| 2682 |
instruction=Image.open('test_images/task4.jpg')
|
| 2683 |
-
return None, [], [], [[], [], []], "", None, [],[],instruction
|
| 2684 |
|
| 2685 |
session_type.change(
|
| 2686 |
session_change,
|
| 2687 |
[],
|
| 2688 |
-
[image_input, chatbot, state, click_state, paragraph, origin_image,history_log,log_state,task_instuction]
|
| 2689 |
)
|
| 2690 |
|
| 2691 |
# upvote_btn.click(
|
|
|
|
| 347 |
return image_features
|
| 348 |
|
| 349 |
@spaces.GPU
|
| 350 |
+
def infer(crop_image_path,full_image_path,state,language,task_type=None):
|
| 351 |
print("task type",task_type)
|
| 352 |
gallery_output = []
|
| 353 |
+
|
| 354 |
+
if task_type=="task 1":
|
| 355 |
+
gallery_output.append("recomendation_pic/1.8.jpg")
|
| 356 |
+
gallery_output.append("recomendation_pic/1.9.jpg")
|
| 357 |
+
input_image = Image.open(full_image_path).convert("RGB")
|
| 358 |
+
input_features = extract_features_siglip(input_image.convert("RGB"))
|
| 359 |
+
input_features = input_features.detach().cpu().numpy()
|
| 360 |
+
input_features = np.float32(input_features)
|
| 361 |
+
faiss.normalize_L2(input_features)
|
| 362 |
+
distances, indices = index.search(input_features, 2)
|
| 363 |
+
for i,v in enumerate(indices[0]):
|
| 364 |
+
sim = -distances[0][i]
|
| 365 |
+
image_url = df.iloc[v]["Link"]
|
| 366 |
+
img_retrieved = read_image_from_url(image_url)
|
| 367 |
+
gallery_output.append(img_retrieved)
|
| 368 |
+
if language=="English":
|
| 369 |
+
msg="🖼️ Please refer to the section below to see the recommended results."
|
| 370 |
+
else:
|
| 371 |
+
msg="🖼️ 请到下方查看推荐结果。"
|
| 372 |
+
state+=[(None,msg)]
|
| 373 |
+
|
| 374 |
+
return gallery_output,state,state
|
| 375 |
+
elif task_type=="task 2":
|
| 376 |
+
gallery_output.append("recomendation_pic/2.8.jpg")
|
| 377 |
+
gallery_output.append("recomendation_pic/2.9.png")
|
| 378 |
+
input_image = Image.open(full_image_path).convert("RGB")
|
| 379 |
+
input_features = extract_features_siglip(input_image.convert("RGB"))
|
| 380 |
+
input_features = input_features.detach().cpu().numpy()
|
| 381 |
+
input_features = np.float32(input_features)
|
| 382 |
+
faiss.normalize_L2(input_features)
|
| 383 |
+
distances, indices = index.search(input_features, 2)
|
| 384 |
+
for i,v in enumerate(indices[0]):
|
| 385 |
+
sim = -distances[0][i]
|
| 386 |
+
image_url = df.iloc[v]["Link"]
|
| 387 |
+
img_retrieved = read_image_from_url(image_url)
|
| 388 |
+
gallery_output.append(img_retrieved)
|
| 389 |
+
if language=="English":
|
| 390 |
+
msg="🖼️ Please refer to the section below to see the recommended results."
|
| 391 |
+
else:
|
| 392 |
+
msg="🖼️ 请到下方查看推荐结果。"
|
| 393 |
+
state+=[(None,msg)]
|
| 394 |
+
|
| 395 |
+
return gallery_output,state,state
|
| 396 |
+
|
| 397 |
+
elif task_type=="task 3":
|
| 398 |
+
gallery_output.append("recomendation_pic/3.8.png")
|
| 399 |
+
gallery_output.append("recomendation_pic/3.9.png")
|
| 400 |
+
input_image = Image.open(full_image_path).convert("RGB")
|
| 401 |
+
input_features = extract_features_siglip(input_image.convert("RGB"))
|
| 402 |
+
input_features = input_features.detach().cpu().numpy()
|
| 403 |
+
input_features = np.float32(input_features)
|
| 404 |
+
faiss.normalize_L2(input_features)
|
| 405 |
+
distances, indices = index.search(input_features, 2)
|
| 406 |
+
for i,v in enumerate(indices[0]):
|
| 407 |
+
sim = -distances[0][i]
|
| 408 |
+
image_url = df.iloc[v]["Link"]
|
| 409 |
+
img_retrieved = read_image_from_url(image_url)
|
| 410 |
+
gallery_output.append(img_retrieved)
|
| 411 |
+
if language=="English":
|
| 412 |
+
msg="🖼️ Please refer to the section below to see the recommended results."
|
| 413 |
+
else:
|
| 414 |
+
msg="🖼️ 请到下方查看推荐结果。"
|
| 415 |
+
state+=[(None,msg)]
|
| 416 |
+
|
| 417 |
+
return gallery_output,state,state
|
| 418 |
|
| 419 |
elif crop_image_path:
|
| 420 |
input_image = Image.open(crop_image_path).convert("RGB")
|
|
|
|
| 1090 |
Image.open(out["crop_save_path"]).save(new_crop_save_path)
|
| 1091 |
print("new crop save",new_crop_save_path)
|
| 1092 |
|
| 1093 |
+
return state, state, click_state, image_input_nobackground, click_index_state, input_mask_state, input_points_state, input_labels_state, out_state,new_crop_save_path,image_input_nobackground
|
| 1094 |
|
| 1095 |
|
| 1096 |
query_focus_en = [
|
|
|
|
| 1646 |
return None
|
| 1647 |
|
| 1648 |
# give the reason of recommendation
|
| 1649 |
+
async def associate(image_path,new_crop,openai_api_key,language,autoplay,length,log_state,sort_score,narritive,state,evt: gr.SelectData):
|
| 1650 |
persona=naritive_mapping[narritive]
|
| 1651 |
rec_path=evt._data['value']['image']['path']
|
| 1652 |
index=evt.index
|
|
|
|
| 1658 |
image_paths=[image_path,rec_path]
|
| 1659 |
result=get_gpt_response(openai_api_key, image_paths, prompt)
|
| 1660 |
print("recommend result",result)
|
| 1661 |
+
state += [(None, f"{result}")]
|
| 1662 |
log_state = log_state + [(narritive, None)]
|
| 1663 |
log_state = log_state + [(f"image sort ranking {sort_score}", None)]
|
| 1664 |
log_state = log_state + [(None, f"{result}")]
|
|
|
|
| 1668 |
audio_output=None
|
| 1669 |
if autoplay:
|
| 1670 |
audio_output = await texttospeech(read_info, language)
|
| 1671 |
+
return state,state,audio_output,log_state,index,gr.update(value=[])
|
| 1672 |
|
| 1673 |
+
def change_naritive(session_type,image_input, state, click_state, paragraph, origin_image,narritive,task_instruct,gallery_output,reco_reasons,language="English"):
|
| 1674 |
if session_type=="Session 1":
|
| 1675 |
+
return None, [], [], [[], [], []], "", None, None, [], [],[]
|
| 1676 |
else:
|
| 1677 |
if language=="English":
|
| 1678 |
if narritive=="Third-person" :
|
|
|
|
| 1720 |
]
|
| 1721 |
|
| 1722 |
|
| 1723 |
+
return image_input, state, state, click_state, paragraph, origin_image,task_instruct,gallery_output,reco_reasons,reco_reasons
|
| 1724 |
|
| 1725 |
|
| 1726 |
def print_like_dislike(x: gr.LikeData,state,log_state):
|
|
|
|
| 1766 |
examples = [
|
| 1767 |
["test_images/1.The Ambassadors.jpg","test_images/task1.jpg","task 1"],
|
| 1768 |
["test_images/2.Football Players.jpg","test_images/task2.jpg","task 2"],
|
| 1769 |
+
["test_images/3-square.jpg","test_images/task3.jpg","task 3"],
|
| 1770 |
# ["test_images/test4.jpg"],
|
| 1771 |
# ["test_images/test5.jpg"],
|
| 1772 |
# ["test_images/Picture5.png"],
|
|
|
|
| 1810 |
# store the whole image path
|
| 1811 |
image_path=gr.State('')
|
| 1812 |
pic_index=gr.State(None)
|
| 1813 |
+
recomended_state=gr.State([])
|
| 1814 |
|
| 1815 |
|
| 1816 |
with gr.Row():
|
|
|
|
| 1822 |
)
|
| 1823 |
with gr.Row():
|
| 1824 |
with gr.Column(scale=1,min_width=50,visible=False) as instruct:
|
| 1825 |
+
task_instuction=gr.Image(type="pil", interactive=True, elem_classes="task_instruct",height=650,label=None)
|
|
|
|
| 1826 |
with gr.Column(scale=6):
|
| 1827 |
with gr.Column(visible=False) as modules_not_need_gpt:
|
| 1828 |
|
|
|
|
| 1941 |
with gr.Column(scale=4):
|
| 1942 |
with gr.Column(visible=True) as module_key_input:
|
| 1943 |
openai_api_key = gr.Textbox(
|
| 1944 |
+
value="sk-proj-bxHhgjZV8TVgd1IupZrUT3BlbkFJvrthq6zIxpZVk3vwsvJ9",
|
| 1945 |
placeholder="Input openAI API key",
|
| 1946 |
show_label=False,
|
| 1947 |
label="OpenAI API Key",
|
|
|
|
| 2207 |
# )
|
| 2208 |
recommend_btn.click(
|
| 2209 |
fn=infer,
|
| 2210 |
+
inputs=[new_crop_save_path,image_path,state,language,task_type],
|
| 2211 |
outputs=[gallery_result,chatbot,state]
|
| 2212 |
)
|
| 2213 |
|
| 2214 |
gallery_result.select(
|
| 2215 |
associate,
|
| 2216 |
+
inputs=[image_path,new_crop_save_path,openai_api_key,language,auto_play,length,log_state,sort_rec,naritive,recomended_state],
|
| 2217 |
+
outputs=[recommend_bot,recomended_state,output_audio,log_state,pic_index,recommend_score],
|
| 2218 |
|
| 2219 |
|
| 2220 |
)
|
|
|
|
| 2435 |
|
| 2436 |
# cap_everything_button.click(cap_everything, [paragraph, visual_chatgpt, language,auto_play],
|
| 2437 |
# [paragraph_output,output_audio])
|
| 2438 |
+
def reset_and_add(origin_image):
|
| 2439 |
+
new_prompt = "Positive"
|
| 2440 |
+
new_add_icon = "assets/icons/plus-square-blue.png"
|
| 2441 |
+
new_add_css = "tools_button_clicked"
|
| 2442 |
+
new_minus_icon = "assets/icons/minus-square.png"
|
| 2443 |
+
new_minus_css= "tools_button"
|
| 2444 |
+
return [[],[],[]],origin_image, new_prompt, gr.update(icon=new_add_icon,elem_classes=new_add_css), gr.update(icon=new_minus_icon,elem_classes=new_minus_css)
|
| 2445 |
+
|
| 2446 |
clear_button_click.click(
|
| 2447 |
+
reset_and_add,
|
| 2448 |
[origin_image],
|
| 2449 |
+
[click_state, image_input,point_prompt,add_button,minus_button],
|
| 2450 |
queue=False,
|
| 2451 |
show_progress=False
|
| 2452 |
)
|
|
|
|
| 2533 |
paragraph,artist,gender,image_path, log_state,history_log,output_audio])
|
| 2534 |
|
| 2535 |
example_image.change(clear_chat_memory, inputs=[visual_chatgpt])
|
| 2536 |
+
# example_image.change(
|
| 2537 |
+
# lambda:([],[]),
|
| 2538 |
+
# [],
|
| 2539 |
+
# [gallery_result,recommend_bot])
|
| 2540 |
|
| 2541 |
# def on_click_tab_selected():
|
| 2542 |
# if gpt_state ==1:
|
|
|
|
| 2680 |
|
| 2681 |
naritive.change(
|
| 2682 |
change_naritive,
|
| 2683 |
+
[session_type, image_input, state, click_state, paragraph, origin_image,naritive,
|
| 2684 |
+
task_instuction,gallery_result,recomended_state,language],
|
| 2685 |
+
[image_input, chatbot, state, click_state, paragraph, origin_image,task_instuction,gallery_result,recomended_state,recommend_bot],
|
| 2686 |
queue=False,
|
| 2687 |
show_progress=False
|
| 2688 |
|
| 2689 |
)
|
| 2690 |
def session_change():
|
| 2691 |
instruction=Image.open('test_images/task4.jpg')
|
| 2692 |
+
return None, [], [], [[], [], []], "", None, [],[],instruction,"task 4"
|
| 2693 |
|
| 2694 |
session_type.change(
|
| 2695 |
session_change,
|
| 2696 |
[],
|
| 2697 |
+
[image_input, chatbot, state, click_state, paragraph, origin_image,history_log,log_state,task_instuction,task_type]
|
| 2698 |
)
|
| 2699 |
|
| 2700 |
# upvote_btn.click(
|