Akjava's picture
broken opencv-version
661ec13
raw
history blame
17.9 kB
import spaces
import gradio as gr
import subprocess
from PIL import Image,ImageOps,ImageDraw,ImageFilter
import json
import os
import time
import mp_box
from mp_utils import get_pixel_cordinate_list,extract_landmark,get_pixel_cordinate,get_pixel_xyz
from glibvision.draw_utils import points_to_box,box_to_xy,plus_point
from glibvision.cv2_utils import plot_points,create_color_image,pil_to_bgr_image,set_plot_text,copy_image
from gradio_utils import save_image,save_buffer,clear_old_files ,read_file
import cv2
from cv2_pose_estimate import estimate_head_pose,draw_head_pose
import numpy as np
from numpy.typing import NDArray
'''
innner_eyes_blur - inner eyes blur
iris_mask_blur - final iris edge blur
'''
set_plot_text(False,0.5,(200,200,200))
depath_ratio = 1.0
model_cordinates = [ (0.0, 0.0, 0.0), # Nose tip
(0.0, 344.0, -40.0 * depath_ratio), # Chin
#(0.0, -160.0, -50.0),#center of eye
#INNER
(-110.0, -215.0, -60.0 * depath_ratio), #inner Left eye left corner
(110.0, -215.0, -60.0 * depath_ratio), #inner Right eye right corne
(-300.0, -250.0, -90.0 * depath_ratio), # Left eye left corner
(300.0, -250.0, -90.0 * depath_ratio), # Right eye right corne
(-125.0, 180.0, -70.0 * depath_ratio), # Left Mouth corner
(125.0, 180.0, -70.0 * depath_ratio) ] # Right mouth corner
def fit_cordinates(cordinates,center_x=512,center_y=512,base_distance = 344):
ratio = base_distance/(cordinates[1][1])
fitted_cordinates = []
for cordinate in model_cordinates:
fitted_cordinate = [
cordinate[0]*ratio+center_x,
cordinate[1]*ratio+center_y,
cordinate[2]*ratio
]
fitted_cordinates.append(fitted_cordinate)
return fitted_cordinates
def plot_model(cv2_image=None,center_x=512,center_y=512,base_distance = 344):
if cv2_image is None:
#TODO add arg
cv2_image=create_color_image(np.zeros((1024, 1024,3),dtype=np.uint8))
fitted_cordinates = fit_cordinates(model_cordinates,center_x,center_y,base_distance)
ratio = base_distance/model_cordinates[1][1]
def adjust_cordinate(point):
return point
plot_points(cv2_image,[adjust_cordinate(fitted_cordinates[0])],False,6,(0,0,255),3,(255,0,0))
plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[1]))],False,6,(0,0,255),3,(255,0,0))
plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[2])),adjust_cordinate((fitted_cordinates[4]))],False,6,(0,0,255),3,(255,0,0))
plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[3])),adjust_cordinate((fitted_cordinates[5]))],False,6,(0,0,255),3,(255,0,0))
plot_points(cv2_image,[adjust_cordinate((fitted_cordinates[6])),adjust_cordinate((fitted_cordinates[7]))],False,6,(0,0,255),3,(255,0,0))
return cv2_image
def set_model_cordinates(cordinates):
global model_cordinates
model_cordinates = cordinates
def process_images(image,base_image,
camera_fov,double_check_offset_center,
draw_base_model,fit_base_model,
first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result,z_multiply=0.8,
progress=gr.Progress(track_tqdm=True)):
clear_old_files()
image_indices = [4,199,#6,#center of eye
133,362,#inner eye
33,263, #outer eye
61,291]#mouth
chin = 344
global model_cordinates
""" normalize ?
model_cordinates =[
[pt[0]/chin,pt[1]/chin,pt[2]/chin] for pt in model_cordinates
]
"""
def landmarks_to_model_corsinates(face_landmarks,indices,w,h):
cordinates = []
z_depth = w if w<h else h
z_depth *=z_multiply
for index in indices:
xyz = get_pixel_xyz(face_landmarker_result.face_landmarks,index,w,h)
#print(xyz,xyz[2]*z_multiply) #TODO chose?
cordinates.append([
xyz[0],xyz[1],xyz[2]*z_depth
])
return cordinates
if image == None:
raise gr.Error("Need Image")
cv2_image = pil_to_bgr_image(image)
size = cv2_image.shape
center: tuple[float, float] = (size[1] / 2, size[0] / 2)
if base_image is not None:#additiona base image
base_image_indices = [
6,197,195,5,4,#nose center
122,196, 3, 51, 45,
351,419,248,281,275,
122,245,244,243,133, #eyes
351,465,464,463,362 #eyes
]
# TODO check same?
cv2_base_image = pil_to_bgr_image(base_image)
mp_image,face_landmarker_result = extract_landmark(cv2_base_image,"face_landmarker.task",0,0,True)
h,w = cv2_base_image.shape[:2]
image_indices = base_image_indices
set_model_cordinates(landmarks_to_model_corsinates(face_landmarker_result.face_landmarks,image_indices,w,h))
print(image_indices)
import math
def calculate_distance(xy, xy2):
return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
mp_image,face_landmarker_result = extract_landmark(cv2_image,"face_landmarker.task",0,0,True)
im = mp_image.numpy_view()
h,w = im.shape[:2]
first_landmarker_result = None
if double_check_offset_center:
root_cordinate = get_pixel_cordinate(face_landmarker_result.face_landmarks,image_indices[0],w,h)#nose tip
diff_center_x = center[0] - root_cordinate[0]
diff_center_y = center[1] - root_cordinate[1]
base = np.zeros_like(cv2_image)
copy_image(base,cv2_image,diff_center_x,diff_center_y)
first_landmarker_result = face_landmarker_result
mp_image,face_landmarker_result = extract_landmark(base,"face_landmarker.task",0,0,True)
im = mp_image.numpy_view()
else:
diff_center_x=0
diff_center_y=0
#return base,"",""
cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,image_indices,w,h)
if draw_mediapipe_mesh:
image = mp_box.draw_landmarks_on_image(face_landmarker_result,image)
cv2_image = pil_to_bgr_image(image)
chin_distance = calculate_distance(cordinates[0],cordinates[1])
#trying detect pnp from same pose,but seeems not working
#fitted_cordinates = fit_cordinates(model_cordinates,cordinates[0][0],cordinates[0][1],chin_distance)
if fit_base_model:
#not get good result
#model_points: NDArray = np.array(fitted_cordinates, dtype="double")
model_points: NDArray = np.array(model_cordinates, dtype="double")
else:
model_points: NDArray = np.array(model_cordinates, dtype="double")
focal_length: float = calculate_distance(cordinates[0],cordinates[1])
focal_length = focal_length*camera_fov
#image_size = size[0] #TODO
#f = (image_size / 2) / np.tan(np.deg2rad(camera_fov / 2))
#focal_length = f
#print(f"fov ={camera_fov} size = {image_size} focal_length = {focal_length}")
camera_matrix: NDArray = np.array([
[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]
], dtype="double")
dist_coeffs: NDArray = np.zeros((4, 1))
# offset center usually improve result
image_points: NDArray = np.array(cordinates, dtype="double")
from scipy.spatial.transform import Rotation as R
def print_euler(rotation_vector,label=""):
order = "yxz"
rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
r = R.from_matrix(rotation_matrix)
euler_angles = r.as_euler(order, degrees=True)
label = f"{label} Euler Angles {order} (degrees): {euler_angles}"
return label
rotation_vector = None
translation_vector = None
im_with_pose = cv2_image
result_label = None
mediapipe_text = None
def face_landmarker_result_to_angle_label(face_landmarker_result,order="yxz"):
if len(face_landmarker_result.facial_transformation_matrixes)>0:
transformation_matrix=face_landmarker_result.facial_transformation_matrixes[0]
rotation_matrix, translation_vector = transformation_matrix[:3, :3],transformation_matrix[:3, 3]
#TODO change base-size
scaled_translation_vector =(translation_vector[0]*1024,translation_vector[1]*1024,translation_vector[2]*1024)
#scaled_translation_vector = (-512,-512,-1024)
if draw_mediapipe_result:
im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_matrix, scaled_translation_vector, camera_matrix, dist_coeffs,32,-diff_center_x,-diff_center_y)
#print("mediapipe",scaled_translation_vector)
#mediapipe_label = print_euler(rotation_vector,"MediaPipe")
r = R.from_matrix(rotation_matrix)
euler_angles = r.as_euler(order, degrees=True)
label = f"Media pipe Euler Angles {order} (degrees): {euler_angles}"
return label
if first_landmarker_result != None:
mediapipe_first_text = face_landmarker_result_to_angle_label(first_landmarker_result)
else:
mediapipe_first_text = ""
mediapipe_second_text = face_landmarker_result_to_angle_label(face_landmarker_result)
if first_pnp!="None":
if first_pnp == "EPNP":
flags = cv2.SOLVEPNP_EPNP
elif first_pnp == "ITERATIVE":
flags = cv2.SOLVEPNP_ITERATIVE
elif first_pnp == "IPPE":
flags = cv2.SOLVEPNP_IPPE
else:
flags = cv2.SOLVEPNP_SQPNP
if first_pnp == "Mediapipe":
rotation_vector, _ = cv2.Rodrigues(rotation_matrix)
translation_vector = scaled_translation_vector
else:
translation_vector = None
#translation_vector = np.array([cordinates[0][0],cordinates[0][1],focal_length],dtype="double")
#translation_vector = scaled_translation_vector
#print("initial",translation_vector,)
rotation_vector, translation_vector = estimate_head_pose(cv2_image, model_points,image_points, camera_matrix, dist_coeffs,flags,None,translation_vector)
#print(translation_vector)
im_with_pose = cv2_image
result_label = print_euler(rotation_vector,first_pnp)
print("firstpnp",translation_vector)
if debug_process:
im_with_pose = draw_head_pose(cv2_image, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128,-diff_center_x,-diff_center_y)
if first_pnp!="None" and second_refine!="None":
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8) # 反復終了条件
if second_refine == "LM":
rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
else:
rotation_vector, translation_vector = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
if debug_process:
im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128+64,-diff_center_x,-diff_center_y)
result_label = print_euler(rotation_vector,second_refine)
#print("refine",translation_vector)
if final_iterative:
(success, rotation_vector, translation_vector) = cv2.solvePnP(
model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
if success:
result_label = print_euler(rotation_vector,"SOLVEPNP_ITERATIVE")
else:
raise gr.Warning("final_iterative faild")
#draw final one
if rotation_vector is not None:
im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,255,-diff_center_x,-diff_center_y)
# mediapipe metrix
#print("opencv",translation_vector)
if draw_base_model:
if fit_base_model:
im_with_pose=plot_model(im_with_pose,cordinates[0][0],cordinates[0][1],chin_distance)
else:
im_with_pose=plot_model(im_with_pose)
return cv2.cvtColor(im_with_pose,cv2.COLOR_BGR2RGB),result_label,mediapipe_first_text,mediapipe_second_text
css="""
#col-left {
margin: 0 auto;
max-width: 640px;
}
#col-right {
margin: 0 auto;
max-width: 640px;
}
.grid-container {
display: flex;
align-items: center;
justify-content: center;
gap:10px
}
.image {
width: 128px;
height: 128px;
object-fit: cover;
}
.text {
font-size: 16px;
}
"""
#css=css,
with gr.Blocks(css=css, elem_id="demo-container") as demo:
with gr.Column():
gr.HTML(read_file("demo_header.html"))
gr.HTML(read_file("demo_tools.html"))
with gr.Row():
with gr.Column():
image = gr.Image(height=800,sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image")
with gr.Row(elem_id="prompt-container", equal_height=False):
with gr.Row():
btn = gr.Button("Pose Estimate", elem_id="run_button",variant="primary")
with gr.Accordion(label="Advanced Settings", open=True):
#need better landmarker
base_image = gr.Image(sources=['upload','clipboard'],image_mode='RGB',elem_id="image_upload", type="pil", label="Image",visible=False)
with gr.Row( equal_height=True):
camera_fov = gr.Slider(info="not effect mediapipe,nose-chin x multiply",
label="Multiply value",
minimum=0.1,
maximum=2.0,
step=0.01,
value=1.2)
double_check_offset_center = gr.Checkbox(label="offset center point",value=True,info="move center and detect again(usually more accurate)")
z_multiply = gr.Slider(info="nose depth",
label="Z-Multiply",
minimum=0.1,
maximum=1.5,
step=0.01,
value=0.8)
with gr.Row( equal_height=True):
draw_base_model = gr.Checkbox(label="draw base model",value=False,info="draw base model")
fit_base_model = gr.Checkbox(label="fit base model",value=False,info="This is just for visual,not use as model")
first_pnp =gr.Radio(label="PnP",choices=["None","EPNP","SQPNP","IPPE","ITERATIVE","Mediapipe"],value="EPNP")
second_refine =gr.Radio(label="PnP refine",choices=["None","LM","VVS"],value="LM")
with gr.Row( equal_height=True):
final_iterative = gr.Checkbox(label="PnP final iterative",value=False,info="sometime good")
debug_process = gr.Checkbox(label="Debug Process",value=False)
draw_mediapipe_mesh = gr.Checkbox(label="Draw mediapipe mesh",value=False)
draw_mediapipe_result = gr.Checkbox(label="Draw mediapipe result",value=False)
plot_button = gr.Button("Plot Model", elem_id="run_button")
with gr.Column():
result_image = gr.Image(height=760,label="Result", elem_id="output-animation",image_mode='RGB')
result_text = gr.Textbox(label="cv2 result")
mediapipe_first_text = gr.Textbox(label="first mediapipe result")
mediapipe_last_text = gr.Textbox(label="2nd or last mediapipe result")
btn.click(fn=process_images, inputs=[image,base_image,
camera_fov,double_check_offset_center,
draw_base_model,fit_base_model,
first_pnp,second_refine,final_iterative,debug_process,draw_mediapipe_mesh,draw_mediapipe_result
],outputs=[result_image,result_text,mediapipe_first_text,mediapipe_last_text] ,api_name='infer')
plot_button.click(fn=plot_model,inputs=[],outputs=[result_image])
example_images = [
["examples/02316230.jpg"],
["examples/00003245_00.jpg"],
["examples/00827009.jpg"],
["examples/00002062.jpg"],
["examples/00824008.jpg"],
["examples/00825000.jpg"],
["examples/00826007.jpg"],
["examples/00824006.jpg"],
["examples/00828003.jpg"],
["examples/00002200.jpg"],
["examples/00005259.jpg"],
["examples/00018022.jpg"],
["examples/img-above.jpg"],
["examples/00100265.jpg"],
["examples/00039259.jpg"],
]
example1=gr.Examples(
examples = example_images,label="Image",
inputs=[image],examples_per_page=8
)
gr.HTML(read_file("demo_footer.html"))
if __name__ == "__main__":
demo.launch()