Spaces:

3DAIGC
/

LHM

Running on Zero

App Files Files Community

DyrusQZ commited on Apr 3

Commit

45a45e6

1 Parent(s): 57f7e9c

update half-body model

Browse files

Files changed (1) hide show

app.py +152 -27

app.py CHANGED Viewed

@@ -70,6 +70,132 @@ def get_bbox(mask):
     scale_box = box.scale(1.1, width=width, height=height)
     return scale_box
 def infer_preprocess_image(
     rgb_path,
     mask,
@@ -99,21 +225,24 @@ def infer_preprocess_image(
     rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
     mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
     h, w, _ = rgb.shape
     assert w < h
     cur_ratio = h / w
     scale_ratio = cur_ratio / aspect_standard
     target_w = int(min(w * scale_ratio, h))
-    offset_w = (target_w - w) // 2
-    # resize to target ratio.
-    if offset_w > 0:
         rgb = np.pad(
             rgb,
             ((0, 0), (offset_w, offset_w), (0, 0)),
             mode="constant",
             constant_values=255,
         )
         mask = np.pad(
             mask,
             ((0, 0), (offset_w, offset_w)),
@@ -121,25 +250,22 @@ def infer_preprocess_image(
             constant_values=0,
         )
     else:
-        offset_w = -offset_w
-        rgb = rgb[:,offset_w:-offset_w,:]
-        mask = mask[:,offset_w:-offset_w]
-    # resize to target ratio.
-    rgb = np.pad(
-        rgb,
-        ((0, 0), (offset_w, offset_w), (0, 0)),
-        mode="constant",
-        constant_values=255,
-    )
-    mask = np.pad(
-        mask,
-        ((0, 0), (offset_w, offset_w)),
-        mode="constant",
-        constant_values=0,
-    )
     rgb = rgb / 255.0  # normalize to [0, 1]
     mask = mask / 255.0
@@ -265,20 +391,19 @@ def launch_pretrained():
     from huggingface_hub import snapshot_download, hf_hub_download
     hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
     os.system("tar -xf assets.tar && rm assets.tar")
-    hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
-    os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
     hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
     os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
 def launch_env_not_compile_with_cuda():
     os.system("pip install chumpy")
     os.system("pip uninstall -y basicsr")
     os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
     os.system("pip install numpy==1.23.0")
-    # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
-    # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
-    # os.system("pip install git+https://github.com/camenduru/simple-knn/")
-    # os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html")
 def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
@@ -662,7 +787,7 @@ def demo_lhm(pose_estimator, face_detector, lhm, cfg):
         )
         gr.HTML(
-            """<p><h4 style="color: red;"> Notes 1: Please input full-body image in case of detection errors. We simplify the pipeline in spaces: 1) using Rembg instead of SAM2; 2) limit the output video length to 10s; For best visual quality, try the inference code on Github instead.</h4></p>"""
         )
         gr.HTML(
             """<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""

     scale_box = box.scale(1.1, width=width, height=height)
     return scale_box
+# def infer_preprocess_image(
+#     rgb_path,
+#     mask,
+#     intr,
+#     pad_ratio,
+#     bg_color,
+#     max_tgt_size,
+#     aspect_standard,
+#     enlarge_ratio,
+#     render_tgt_size,
+#     multiply,
+#     need_mask=True,
+# ):
+#     """inferece
+#     image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
+#                                         max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
+#                                         render_tgt_size=source_size, multiply=14, need_mask=True)
+#     """
+#     rgb = np.array(Image.open(rgb_path))
+#     rgb_raw = rgb.copy()
+#     bbox = get_bbox(mask)
+#     bbox_list = bbox.get_box()
+#     rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+#     mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+#     h, w, _ = rgb.shape
+#     assert w < h
+#     cur_ratio = h / w
+#     scale_ratio = cur_ratio / aspect_standard
+#     target_w = int(min(w * scale_ratio, h))
+#     offset_w = (target_w - w) // 2
+#     # resize to target ratio.
+#     if offset_w > 0:
+#         rgb = np.pad(
+#             rgb,
+#             ((0, 0), (offset_w, offset_w), (0, 0)),
+#             mode="constant",
+#             constant_values=255,
+#         )
+#         mask = np.pad(
+#             mask,
+#             ((0, 0), (offset_w, offset_w)),
+#             mode="constant",
+#             constant_values=0,
+#         )
+#     else:
+#         offset_w = -offset_w
+#         rgb = rgb[:,offset_w:-offset_w,:]
+#         mask = mask[:,offset_w:-offset_w]
+#     # resize to target ratio.
+#     rgb = np.pad(
+#         rgb,
+#         ((0, 0), (offset_w, offset_w), (0, 0)),
+#         mode="constant",
+#         constant_values=255,
+#     )
+#     mask = np.pad(
+#         mask,
+#         ((0, 0), (offset_w, offset_w)),
+#         mode="constant",
+#         constant_values=0,
+#     )
+#     rgb = rgb / 255.0  # normalize to [0, 1]
+#     mask = mask / 255.0
+#     mask = (mask > 0.5).astype(np.float32)
+#     rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
+#     # resize to specific size require by preprocessor of smplx-estimator.
+#     rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
+#     mask = resize_image_keepaspect_np(mask, max_tgt_size)
+#     # crop image to enlarge human area.
+#     rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
+#         rgb, mask, aspect_standard, enlarge_ratio
+#     )
+#     if intr is not None:
+#         intr[0, 2] -= offset_x
+#         intr[1, 2] -= offset_y
+#     # resize to render_tgt_size for training
+#     tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
+#         cur_hw=rgb.shape[:2],
+#         aspect_standard=aspect_standard,
+#         tgt_size=render_tgt_size,
+#         multiply=multiply,
+#     )
+#     rgb = cv2.resize(
+#         rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+#     )
+#     mask = cv2.resize(
+#         mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+#     )
+#     if intr is not None:
+#         # ******************** Merge *********************** #
+#         intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
+#         assert (
+#             abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
+#         ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
+#         assert (
+#             abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
+#         ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
+#         # ******************** Merge *********************** #
+#         intr[0, 2] = rgb.shape[1] // 2
+#         intr[1, 2] = rgb.shape[0] // 2
+#     rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+#     mask = (
+#         torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
+#     )  # [1, 1, H, W]
+#     return rgb, mask, intr
 def infer_preprocess_image(
     rgb_path,
     mask,
     rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
     mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
     h, w, _ = rgb.shape
     assert w < h
     cur_ratio = h / w
     scale_ratio = cur_ratio / aspect_standard
     target_w = int(min(w * scale_ratio, h))
+    if target_w - w >0:
+        offset_w = (target_w - w) // 2
         rgb = np.pad(
             rgb,
             ((0, 0), (offset_w, offset_w), (0, 0)),
             mode="constant",
             constant_values=255,
         )
         mask = np.pad(
             mask,
             ((0, 0), (offset_w, offset_w)),
             constant_values=0,
         )
     else:
+        target_h = w * aspect_standard
+        offset_h = int(target_h - h)
+        rgb = np.pad(
+            rgb,
+            ((offset_h, 0), (0, 0), (0, 0)),
+            mode="constant",
+            constant_values=255,
+        )
+        mask = np.pad(
+            mask,
+            ((offset_h, 0), (0, 0)),
+            mode="constant",
+            constant_values=0,
+        )
     rgb = rgb / 255.0  # normalize to [0, 1]
     mask = mask / 255.0
     from huggingface_hub import snapshot_download, hf_hub_download
     hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
     os.system("tar -xf assets.tar && rm assets.tar")
+    # hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
+    # os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
     hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
     os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
+    # replace the weight of full body
+    hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
+    hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
 def launch_env_not_compile_with_cuda():
     os.system("pip install chumpy")
     os.system("pip uninstall -y basicsr")
     os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
     os.system("pip install numpy==1.23.0")
 def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
         )
         gr.HTML(
+            """<p><h4 style="color: red;"> Notes 1: Glad to tell you that we have supported both full-body or half-body input! Try to test the robustness with half-body images!.</h4></p>"""
         )
         gr.HTML(
             """<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""