Spaces:

3DAIGC
/

LHM

Running on Zero

App Files Files Community

DyrusQZ commited on Mar 18

Commit

f5e714b

1 Parent(s): b206b0b

move detail models in app

Browse files

Files changed (1) hide show

app.py +319 -5

app.py CHANGED Viewed

@@ -25,6 +25,138 @@ import os
 from engine.pose_estimation.pose_estimator import PoseEstimator
 from LHM.utils.face_detector import VGGHeadDetector
 from LHM.utils.hf_hub import wrap_model_hub
 def parse_configs():
@@ -193,11 +325,192 @@ def demo_lhm(pose_estimator, face_detector, lhm_model, cfg):
         motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
         vis_motion = cfg.get("vis_motion", False)  # False
-        parsing_mask = parsing(image_raw)
-        input = cv2.imread(img_path)
-        output = remove(input)
-        alpha = output[:,:,3]
         # self.infer_single(
         #     image_path,
@@ -221,6 +534,7 @@ def demo_lhm(pose_estimator, face_detector, lhm_model, cfg):
         #     gradio_video_save_path=dump_video_path
         # ))
         # if status:
         #     return dump_image_path, dump_video_path
         # else:

 from engine.pose_estimation.pose_estimator import PoseEstimator
 from LHM.utils.face_detector import VGGHeadDetector
 from LHM.utils.hf_hub import wrap_model_hub
+from LHM.runners.infer.utils import (
+    calc_new_tgt_size_by_aspect,
+    center_crop_according_to_mask,
+    prepare_motion_seqs,
+    resize_image_keepaspect_np,
+)
+def infer_preprocess_image(
+    rgb_path,
+    mask,
+    intr,
+    pad_ratio,
+    bg_color,
+    max_tgt_size,
+    aspect_standard,
+    enlarge_ratio,
+    render_tgt_size,
+    multiply,
+    need_mask=True,
+):
+    """inferece
+    image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
+                                        max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
+                                        render_tgt_size=source_size, multiply=14, need_mask=True)
+    """
+    rgb = np.array(Image.open(rgb_path))
+    rgb_raw = rgb.copy()
+    bbox = get_bbox(mask)
+    bbox_list = bbox.get_box()
+    rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+    mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+    h, w, _ = rgb.shape
+    assert w < h
+    cur_ratio = h / w
+    scale_ratio = cur_ratio / aspect_standard
+    target_w = int(min(w * scale_ratio, h))
+    offset_w = (target_w - w) // 2
+    # resize to target ratio.
+    if offset_w > 0:
+        rgb = np.pad(
+            rgb,
+            ((0, 0), (offset_w, offset_w), (0, 0)),
+            mode="constant",
+            constant_values=255,
+        )
+        mask = np.pad(
+            mask,
+            ((0, 0), (offset_w, offset_w)),
+            mode="constant",
+            constant_values=0,
+        )
+    else:
+        offset_w = -offset_w
+        rgb = rgb[:,offset_w:-offset_w,:]
+        mask = mask[:,offset_w:-offset_w]
+    # resize to target ratio.
+    rgb = np.pad(
+        rgb,
+        ((0, 0), (offset_w, offset_w), (0, 0)),
+        mode="constant",
+        constant_values=255,
+    )
+    mask = np.pad(
+        mask,
+        ((0, 0), (offset_w, offset_w)),
+        mode="constant",
+        constant_values=0,
+    )
+    rgb = rgb / 255.0  # normalize to [0, 1]
+    mask = mask / 255.0
+    mask = (mask > 0.5).astype(np.float32)
+    rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
+    # resize to specific size require by preprocessor of smplx-estimator.
+    rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
+    mask = resize_image_keepaspect_np(mask, max_tgt_size)
+    # crop image to enlarge human area.
+    rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
+        rgb, mask, aspect_standard, enlarge_ratio
+    )
+    if intr is not None:
+        intr[0, 2] -= offset_x
+        intr[1, 2] -= offset_y
+    # resize to render_tgt_size for training
+    tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
+        cur_hw=rgb.shape[:2],
+        aspect_standard=aspect_standard,
+        tgt_size=render_tgt_size,
+        multiply=multiply,
+    )
+    rgb = cv2.resize(
+        rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+    )
+    mask = cv2.resize(
+        mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+    )
+    if intr is not None:
+        # ******************** Merge *********************** #
+        intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
+        assert (
+            abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
+        ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
+        assert (
+            abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
+        ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
+        # ******************** Merge *********************** #
+        intr[0, 2] = rgb.shape[1] // 2
+        intr[1, 2] = rgb.shape[0] // 2
+    rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+    mask = (
+        torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
+    )  # [1, 1, H, W]
+    return rgb, mask, intr
 def parse_configs():
         motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
         vis_motion = cfg.get("vis_motion", False)  # False
+        input_np = cv2.imread(image_raw)
+        output_np = remove(input_np)
+        parsing_mask = output_np[:,:,3]
+        # prepare reference image
+        image, _, _ = infer_preprocess_image(
+            image_raw,
+            mask=parsing_mask,
+            intr=None,
+            pad_ratio=0,
+            bg_color=1.0,
+            max_tgt_size=896,
+            aspect_standard=aspect_standard,
+            enlarge_ratio=[1.0, 1.0],
+            render_tgt_size=source_size,
+            multiply=14,
+            need_mask=True,
+        )
+        try:
+            rgb = np.array(Image.open(image_path))
+            rgb = torch.from_numpy(rgb).permute(2, 0, 1)
+            bbox = face_detector.detect_face(rgb)
+            head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
+            head_rgb = head_rgb.permute(1, 2, 0)
+            src_head_rgb = head_rgb.cpu().numpy()
+        except:
+            print("w/o head input!")
+            src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
+        # resize to dino size
+        try:
+            src_head_rgb = cv2.resize(
+                src_head_rgb,
+                dsize=(cfg.src_head_size, cfg.src_head_size),
+                interpolation=cv2.INTER_AREA,
+            )  # resize to dino size
+        except:
+            src_head_rgb = np.zeros(
+                (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
+            )
+        src_head_rgb = (
+            torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
+        )  # [1, 3, H, W]
+        save_ref_img_path = os.path.join(
+            dump_tmp_dir, "output.png"
+        )
+        vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
+            np.uint8
+        )
+        Image.fromarray(vis_ref_img).save(save_ref_img_path)
+        # read motion seq
+        motion_name = os.path.dirname(
+            motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
+        )
+        motion_name = os.path.basename(motion_name)
+        motion_seq = prepare_motion_seqs(
+            motion_seqs_dir,
+            None,
+            save_root=dump_tmp_dir,
+            fps=30,
+            bg_color=1.0,
+            aspect_standard=aspect_standard,
+            enlarge_ratio=[1.0, 1, 0],
+            render_image_res=render_size,
+            multiply=16,
+            need_mask=motion_img_need_mask,
+            vis_motion=vis_motion,
+        )
+        camera_size = len(motion_seq["motion_seqs"])
+        shape_param = shape_pose.beta
+        device = "cuda"
+        dtype = torch.float32
+        shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
+        lhm.to(dtype)
+        smplx_params = motion_seq['smplx_params']
+        smplx_params['betas'] = shape_param.to(device)
+        gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
+            image.unsqueeze(0).to(device, dtype),
+            src_head_rgb.unsqueeze(0).to(device, dtype),
+            None,
+            None,
+            render_c2ws=motion_seq["render_c2ws"].to(device),
+            render_intrs=motion_seq["render_intrs"].to(device),
+            render_bg_colors=motion_seq["render_bg_colors"].to(device),
+            smplx_params={
+                k: v.to(device) for k, v in smplx_params.items()
+            },
+        )
+        # rendering !!!!
+        start_time = time.time()
+        batch_dict = dict()
+        batch_size = 40  # avoid memeory out!
+        for batch_i in range(0, camera_size, batch_size):
+            with torch.no_grad():
+                # TODO check device and dtype
+                # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
+                keys = [
+                    "root_pose",
+                    "body_pose",
+                    "jaw_pose",
+                    "leye_pose",
+                    "reye_pose",
+                    "lhand_pose",
+                    "rhand_pose",
+                    "trans",
+                    "focal",
+                    "princpt",
+                    "img_size_wh",
+                    "expr",
+                ]
+                batch_smplx_params = dict()
+                batch_smplx_params["betas"] = shape_param.to(device)
+                batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
+                for key in keys:
+                    batch_smplx_params[key] = motion_seq["smplx_params"][key][
+                        :, batch_i : batch_i + batch_size
+                    ].to(device)
+                res = self.model.animation_infer(gs_model_list, query_points, batch_smplx_params,
+                    render_c2ws=motion_seq["render_c2ws"][
+                        :, batch_i : batch_i + batch_size
+                    ].to(device),
+                    render_intrs=motion_seq["render_intrs"][
+                        :, batch_i : batch_i + batch_size
+                    ].to(device),
+                    render_bg_colors=motion_seq["render_bg_colors"][
+                        :, batch_i : batch_i + batch_size
+                    ].to(device),
+                )
+            for accumulate_key in ["comp_rgb", "comp_mask"]:
+                if accumulate_key not in batch_dict:
+                    batch_dict[accumulate_key] = []
+                batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
+            del res
+            torch.cuda.empty_cache()
+        for accumulate_key in ["comp_rgb", "comp_mask"]:
+            batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
+        print(f"time elapsed: {time.time() - start_time}")
+        rgb = batch_dict["comp_rgb"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask = batch_dict["comp_mask"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask[mask < 0.5] = 0.0
+        rgb = rgb * mask + (1 - mask) * 1
+        rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
+        if vis_motion:
+            # print(rgb.shape, motion_seq["vis_motion_render"].shape)
+            vis_ref_img = np.tile(
+                cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
+                    None, :, :, :
+                ],
+                (rgb.shape[0], 1, 1, 1),
+            )
+            rgb = np.concatenate(
+                [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
+            )
+        os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
+        images_to_video(
+            rgb,
+            output_path=dump_video_path,
+            fps=render_fps,
+            gradio_codec=False,
+            verbose=True,
+        )
         # self.infer_single(
         #     image_path,
         #     gradio_video_save_path=dump_video_path
         # ))
+        return dump_image_path, dump_video_path
         # if status:
         #     return dump_image_path, dump_video_path
         # else: