Spaces:

feishen29
/

IMAGDressing-v1

Runtime error

App Files Files Community

feishen29 commited on Jul 26, 2024

Commit

b91efff

verified ·

1 Parent(s): 2afb5eb

Upload app.py

Browse files

Files changed (1) hide show

app.py +14 -62

app.py CHANGED Viewed

@@ -39,37 +39,12 @@ from insightface.app import FaceAnalysis
 from insightface.utils import face_align
-# device = 'cuda:2' if torch.cuda.is_available() else 'cpu'
 parser = argparse.ArgumentParser(description='IMAGDressing-v1')
-# parser.add_argument('--if_resampler', type=bool, default=True)
 parser.add_argument('--if_ipa', type=bool, default=True)
 parser.add_argument('--if_control', type=bool, default=True)
-# parser.add_argument('--pretrained_model_name_or_path',
-#                     default="./ckpt/Realistic_Vision_V4.0_noVAE",
-#                     type=str)
-# parser.add_argument('--ip_ckpt',
-#                     default="./ckpt/ip-adapter-faceid-plus_sd15.bin",
-#                     type=str)
-# parser.add_argument('--pretrained_image_encoder_path',
-#                     default="./ckpt/image_encoder/",
-#                     type=str)
-# parser.add_argument('--pretrained_vae_model_path',
-#                     default="./ckpt/sd-vae-ft-mse/",
-#                     type=str)
-# parser.add_argument('--model_ckpt',
-#                     default="./ckpt/IMAGDressing-v1_512.pt",
-#                     type=str)
-# parser.add_argument('--output_path', type=str, default="./output_ipa_control_resampler")
-# # parser.add_argument('--device', type=str, default="cuda:0")
 args = parser.parse_args()
-# svae path
-# output_path = args.output_path
-#
-# if not os.path.exists(output_path):
-#     os.makedirs(output_path)
 args.device = "cuda"
@@ -80,8 +55,6 @@ text_encoder = CLIPTextModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noV
 image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder").to(dtype=torch.float16, device=args.device)
 unet = UNet2DConditionModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", subfolder="unet").to(dtype=torch.float16,device=args.device)
-image_face_fusion = pipeline('face_fusion_torch', model='damo/cv_unet_face_fusion_torch', model_revision='v1.0.3')
 #face_model
 app = FaceAnalysis(model_path="buffalo_l", providers=[('CUDAExecutionProvider', {"device_id": args.device})]) ##使用GPU:0, 默认使用buffalo_l就可以了
 app.prepare(ctx_id=0, det_size=(640, 640))
@@ -112,7 +85,7 @@ for name in unet.attn_processors.keys():
     elif name.startswith("down_blocks"):
         block_id = int(name[len("down_blocks.")])
         hidden_size = unet.config.block_out_channels[block_id]
-    # lora_rank = hidden_size // 2 # args.lora_rank
     if cross_attention_dim is None:
         attn_procs[name] = RefLoraSAttnProcessor2_0(name, hidden_size)
     else:
@@ -161,18 +134,10 @@ noise_scheduler = DDIMScheduler(
     set_alpha_to_one=False,
     steps_offset=1,
 )
-# noise_scheduler = UniPCMultistepScheduler.from_config(args.pretrained_model_name_or_path, subfolder="scheduler")
 control_net_openpose = ControlNetModel.from_pretrained(
     "lllyasviel/control_v11p_sd15_openpose",
     torch_dtype=torch.float16).to(device=args.device)
-# pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
-#                         text_encoder=text_encoder, image_encoder=image_encoder,
-#                         ip_ckpt=args.ip_ckpt,
-#                         ImgProj=image_proj, controlnet=control_net_openpose,
-#                         scheduler=noise_scheduler,
-#                         safety_checker=StableDiffusionSafetyChecker,
-#                         feature_extractor=CLIPImageProcessor)
 img_transform = transforms.Compose([
     transforms.Resize([640, 512], interpolation=transforms.InterpolationMode.BILINEAR),
@@ -197,33 +162,27 @@ def resize_img(input_image, max_side=640, min_side=512, size=None,
 @spaces.GPU
 def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, caption_guidance_scale,
                   face_guidance_scale,self_guidance_scale, cross_guidance_scale,if_ipa, if_post, if_control, denoise_steps, seed=42):
-    # prompt = prompt + ', confident smile expression, fashion, best quality, amazing quality, very aesthetic'
     if prompt is None:
         prompt = "a photography of a model"
     prompt = prompt + ', best quality, high quality'
     print(prompt, cloth_guidance_scale, if_ipa, if_control, denoise_steps, seed)
     clip_image_processor = CLIPImageProcessor()
-    # clothes_img = garm_img.convert("RGB")
     if not garm_img:
         raise gr.Error("请上传衣服 / Please upload garment")
     clothes_img = resize_img(garm_img)
     vae_clothes = img_transform(clothes_img).unsqueeze(0)
-    # print(vae_clothes.shape)
     ref_clip_image = clip_image_processor(images=clothes_img, return_tensors="pt").pixel_values
     if if_ipa:
-        # image = cv2.imread(face_img)
         faces = app.get(face_img)
         if not faces:
             raise gr.Error("人脸检测异常，尝试其他肖像 / Abnormal face detection. Try another portrait")
         faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
         face_image = face_align.norm_crop(face_img, landmark=faces[0].kps, image_size=224) # you can also segment the face
-        # face_img = face_image[:, :, ::-1]
-        # face_img = Image.fromarray(face_image.astype('uint8'))
-        # face_img.save('face.png')
         face_clip_image = clip_image_processor(images=face_image, return_tensors="pt").pixel_values
     else:
         faceid_embeds = None
@@ -235,9 +194,6 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
         pose_image = diffusers.utils.load_image(pose_img)
     else:
         pose_image = None
-    # print(if_ipa, if_control)
-    # pipe, generator = prepare_pipeline(args, if_ipa, if_control, unet, ref_unet, vae, tokenizer, text_encoder,
-    #                                    image_encoder, image_proj, control_net_openpose)
     noise_scheduler = DDIMScheduler(
         num_train_timesteps=1000,
@@ -248,7 +204,7 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
         set_alpha_to_one=False,
         steps_offset=1,
     )
-    # noise_scheduler = UniPCMultistepScheduler.from_config(args.pretrained_model_name_or_path, subfolder="scheduler")
     pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
                             text_encoder=text_encoder, image_encoder=image_encoder,
                             ip_ckpt='./ckpt/ip-adapter-faceid-plus_sd15.bin',
@@ -279,11 +235,12 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
     ).images
     if if_post and if_ipa:
-        # 将 PIL 图像转换为 NumPy 数组
         output_array = np.array(output[0])
-        # 将 RGB 图像转换为 BGR 图像
         bgr_array = cv2.cvtColor(output_array, cv2.COLOR_RGB2BGR)
-        # 将 NumPy 数组转换为 PIL 图像
         bgr_image = Image.fromarray(bgr_array)
         result = image_face_fusion(dict(template=bgr_image, user=Image.fromarray(face_image.astype('uint8'))))
         return result[OutputKeys.OUTPUT_IMG]
@@ -349,11 +306,8 @@ with image_blocks as demo:
                 outputs=pose_img,
                 examples=pose_list_path)
-        # with gr.Column():
-        #     # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
-        #     masked_img = gr.Image(label="Masked image output", elem_id="masked-img", show_share_button=False)
         with gr.Column():
-            # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
             image_out = gr.Image(label="Output", elem_id="output-img", show_share_button=False)
     # Add usage tips below the output image
     gr.Markdown("""
@@ -367,19 +321,17 @@ with image_blocks as demo:
     """)
     with gr.Column():
         try_button = gr.Button(value="Dressing")
-        with gr.Accordion(label="Advanced Settings", open=False):
             with gr.Row(elem_id="prompt-container"):
                 with gr.Row():
                     prompt = gr.Textbox(placeholder="Description of prompt ex) A beautiful woman dress Short Sleeve Round Neck T-shirts",value='A beautiful woman',
                                         show_label=False, elem_id="prompt")
-                # with gr.Row():
-                #     neg_prompt = gr.Textbox(placeholder="Description of neg prompt ex) Short Sleeve Round Neck T-shirts",
-                #                         show_label=False, elem_id="neg_prompt")
             with gr.Row():
-                cloth_guidance_scale = gr.Slider(label="Cloth guidance Scale", minimum=0.0, maximum=1.0, value=0.9, step=0.1,
                                              visible=True)
             with gr.Row():
-                caption_guidance_scale = gr.Slider(label="Prompt Guidance Scale", minimum=1, maximum=10., value=7.0, step=0.1,
                                              visible=True)
             with gr.Row():
                 face_guidance_scale = gr.Slider(label="Face Guidance Scale", minimum=0.0, maximum=2.0, value=0.9, step=0.1,

 from insightface.utils import face_align
 parser = argparse.ArgumentParser(description='IMAGDressing-v1')
 parser.add_argument('--if_ipa', type=bool, default=True)
 parser.add_argument('--if_control', type=bool, default=True)
 args = parser.parse_args()
 args.device = "cuda"
 image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder").to(dtype=torch.float16, device=args.device)
 unet = UNet2DConditionModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", subfolder="unet").to(dtype=torch.float16,device=args.device)
 #face_model
 app = FaceAnalysis(model_path="buffalo_l", providers=[('CUDAExecutionProvider', {"device_id": args.device})]) ##使用GPU:0, 默认使用buffalo_l就可以了
 app.prepare(ctx_id=0, det_size=(640, 640))
     elif name.startswith("down_blocks"):
         block_id = int(name[len("down_blocks.")])
         hidden_size = unet.config.block_out_channels[block_id]
     if cross_attention_dim is None:
         attn_procs[name] = RefLoraSAttnProcessor2_0(name, hidden_size)
     else:
     set_alpha_to_one=False,
     steps_offset=1,
 )
 control_net_openpose = ControlNetModel.from_pretrained(
     "lllyasviel/control_v11p_sd15_openpose",
     torch_dtype=torch.float16).to(device=args.device)
 img_transform = transforms.Compose([
     transforms.Resize([640, 512], interpolation=transforms.InterpolationMode.BILINEAR),
 @spaces.GPU
 def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, caption_guidance_scale,
                   face_guidance_scale,self_guidance_scale, cross_guidance_scale,if_ipa, if_post, if_control, denoise_steps, seed=42):
     if prompt is None:
         prompt = "a photography of a model"
     prompt = prompt + ', best quality, high quality'
     print(prompt, cloth_guidance_scale, if_ipa, if_control, denoise_steps, seed)
     clip_image_processor = CLIPImageProcessor()
     if not garm_img:
         raise gr.Error("请上传衣服 / Please upload garment")
     clothes_img = resize_img(garm_img)
     vae_clothes = img_transform(clothes_img).unsqueeze(0)
     ref_clip_image = clip_image_processor(images=clothes_img, return_tensors="pt").pixel_values
     if if_ipa:
         faces = app.get(face_img)
         if not faces:
             raise gr.Error("人脸检测异常，尝试其他肖像 / Abnormal face detection. Try another portrait")
         faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
         face_image = face_align.norm_crop(face_img, landmark=faces[0].kps, image_size=224) # you can also segment the face
         face_clip_image = clip_image_processor(images=face_image, return_tensors="pt").pixel_values
     else:
         faceid_embeds = None
         pose_image = diffusers.utils.load_image(pose_img)
     else:
         pose_image = None
     noise_scheduler = DDIMScheduler(
         num_train_timesteps=1000,
         set_alpha_to_one=False,
         steps_offset=1,
     )
     pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
                             text_encoder=text_encoder, image_encoder=image_encoder,
                             ip_ckpt='./ckpt/ip-adapter-faceid-plus_sd15.bin',
     ).images
     if if_post and if_ipa:
+        image_face_fusion = pipeline('face_fusion_torch', model='damo/cv_unet_face_fusion_torch',
+                                     model_revision='v1.0.3')
         output_array = np.array(output[0])
         bgr_array = cv2.cvtColor(output_array, cv2.COLOR_RGB2BGR)
         bgr_image = Image.fromarray(bgr_array)
         result = image_face_fusion(dict(template=bgr_image, user=Image.fromarray(face_image.astype('uint8'))))
         return result[OutputKeys.OUTPUT_IMG]
                 outputs=pose_img,
                 examples=pose_list_path)
         with gr.Column():
             image_out = gr.Image(label="Output", elem_id="output-img", show_share_button=False)
     # Add usage tips below the output image
     gr.Markdown("""
     """)
     with gr.Column():
         try_button = gr.Button(value="Dressing")
+        with gr.Accordion(label="Advanced Settings", open=True):
             with gr.Row(elem_id="prompt-container"):
                 with gr.Row():
                     prompt = gr.Textbox(placeholder="Description of prompt ex) A beautiful woman dress Short Sleeve Round Neck T-shirts",value='A beautiful woman',
                                         show_label=False, elem_id="prompt")
             with gr.Row():
+                cloth_guidance_scale = gr.Slider(label="Cloth guidance Scale", minimum=0.0, maximum=1.0, value=0.85, step=0.1,
                                              visible=True)
             with gr.Row():
+                caption_guidance_scale = gr.Slider(label="Prompt Guidance Scale", minimum=1, maximum=10., value=6.5, step=0.1,
                                              visible=True)
             with gr.Row():
                 face_guidance_scale = gr.Slider(label="Face Guidance Scale", minimum=0.0, maximum=2.0, value=0.9, step=0.1,