Spaces:

3DAIGC
/

LHM

Running on Zero

App Files Files Community

DyrusQZ commited on Mar 19

Commit

04c1907

1 Parent(s): 7457a7b

to fix gs error

Browse files

Files changed (5) hide show

LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc +0 -0
LHM/models/rendering/gs_renderer.py +16 -14
app.py +760 -760
wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl +2 -2
wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl +2 -2

LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc CHANGED Viewed

Binary files a/LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc and b/LHM/models/rendering/__pycache__/gs_renderer.cpython-310.pyc differ

LHM/models/rendering/gs_renderer.py CHANGED Viewed

@@ -818,7 +818,7 @@ class GS3DRenderer(nn.Module):
     def hyper_step(self, step):
         self.gs_net.hyper_step(step)
-    @torch.no_grad()
     def forward_single_view(
         self,
         gs: GaussianModel,
@@ -829,14 +829,14 @@ class GS3DRenderer(nn.Module):
         # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
         screenspace_points = (
             torch.zeros_like(
-                gs.xyz, dtype=gs.xyz.dtype, requires_grad=True, device=self.device
             )
             + 0
         )
-        try:
-            screenspace_points.retain_grad()
-        except:
-            pass
         bg_color = background_color
         # Set up rasterization configuration
@@ -877,23 +877,25 @@ class GS3DRenderer(nn.Module):
         shs = None
         colors_precomp = None
         if self.gs_net.use_rgb:
-            colors_precomp = gs.shs.squeeze(1).float()
             shs = None
         else:
             colors_precomp = None
-            shs = gs.shs.float()
         # Rasterize visible Gaussians to image, obtain their radii (on screen).
         # NOTE that dadong tries to regress rgb not shs
         # with torch.autocast(device_type=self.device.type, dtype=torch.float32):
         rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
-            means3D=means3D.float(),
-            means2D=means2D.float(),
             shs=shs,
             colors_precomp=colors_precomp,
-            opacities=opacity.float(),
-            scales=scales.float(),
-            rotations=rotations.float(),
             cov3D_precomp=cov3D_precomp,
         )
@@ -1322,7 +1324,7 @@ class GS3DRenderer(nn.Module):
             gs_attr_list.append(gs_attr)
         return gs_attr_list, query_points, smplx_data
-    @torch.no_grad()
     def forward_animate_gs(
         self,
         gs_attr_list,

     def hyper_step(self, step):
         self.gs_net.hyper_step(step)
     def forward_single_view(
         self,
         gs: GaussianModel,
         # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
         screenspace_points = (
             torch.zeros_like(
+                gs.xyz, dtype=gs.xyz.dtype, requires_grad=False, device=self.device
             )
             + 0
         )
+        # try:
+        #     screenspace_points.retain_grad()
+        # except:
+        #     pass
         bg_color = background_color
         # Set up rasterization configuration
         shs = None
         colors_precomp = None
         if self.gs_net.use_rgb:
+            colors_precomp = gs.shs.squeeze(1)
             shs = None
         else:
             colors_precomp = None
+            shs = gs.shs
         # Rasterize visible Gaussians to image, obtain their radii (on screen).
         # NOTE that dadong tries to regress rgb not shs
         # with torch.autocast(device_type=self.device.type, dtype=torch.float32):
+        print(means3D.device, means2D.device, colors_precomp.device, opacity.device, rotations.device, self.device)
+        print(means3D.dtype, means2D.dtype, colors_precomp.dtype)
         rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
+            means3D=means3D,
+            means2D=means2D,
             shs=shs,
             colors_precomp=colors_precomp,
+            opacities=opacity,
+            scales=scales,
+            rotations=rotations,
             cov3D_precomp=cov3D_precomp,
         )
             gs_attr_list.append(gs_attr)
         return gs_attr_list, query_points, smplx_data
     def forward_animate_gs(
         self,
         gs_attr_list,

app.py CHANGED Viewed

@@ -13,772 +13,772 @@
 # limitations under the License.
-import os
-os.system("rm -rf /data-nvme/zerogpu-offload/")
-import cv2
-import time
-from PIL import Image
-import numpy as np
-import gradio as gr
-import base64
-import spaces
-import torch
-torch._dynamo.config.disable = True
-import subprocess
-import os
-import argparse
-from omegaconf import OmegaConf
-from rembg import remove
-from engine.pose_estimation.pose_estimator import PoseEstimator
-from LHM.utils.face_detector import VGGHeadDetector
-from LHM.utils.hf_hub import wrap_model_hub
-from LHM.runners.infer.utils import (
-    calc_new_tgt_size_by_aspect,
-    center_crop_according_to_mask,
-    prepare_motion_seqs,
-    resize_image_keepaspect_np,
-)
-from engine.SegmentAPI.base import Bbox
-def get_bbox(mask):
-    height, width = mask.shape
-    pha = mask / 255.0
-    pha[pha < 0.5] = 0.0
-    pha[pha >= 0.5] = 1.0
-    # obtain bbox
-    _h, _w = np.where(pha == 1)
-    whwh = [
-        _w.min().item(),
-        _h.min().item(),
-        _w.max().item(),
-        _h.max().item(),
-    ]
-    box = Bbox(whwh)
-    # scale box to 1.05
-    scale_box = box.scale(1.1, width=width, height=height)
-    return scale_box
-def infer_preprocess_image(
-    rgb_path,
-    mask,
-    intr,
-    pad_ratio,
-    bg_color,
-    max_tgt_size,
-    aspect_standard,
-    enlarge_ratio,
-    render_tgt_size,
-    multiply,
-    need_mask=True,
-):
-    """inferece
-    image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
-                                        max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
-                                        render_tgt_size=source_size, multiply=14, need_mask=True)
-    """
-    rgb = np.array(Image.open(rgb_path))
-    rgb_raw = rgb.copy()
-    bbox = get_bbox(mask)
-    bbox_list = bbox.get_box()
-    rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
-    mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
-    h, w, _ = rgb.shape
-    assert w < h
-    cur_ratio = h / w
-    scale_ratio = cur_ratio / aspect_standard
-    target_w = int(min(w * scale_ratio, h))
-    offset_w = (target_w - w) // 2
-    # resize to target ratio.
-    if offset_w > 0:
-        rgb = np.pad(
-            rgb,
-            ((0, 0), (offset_w, offset_w), (0, 0)),
-            mode="constant",
-            constant_values=255,
-        )
-        mask = np.pad(
-            mask,
-            ((0, 0), (offset_w, offset_w)),
-            mode="constant",
-            constant_values=0,
-        )
-    else:
-        offset_w = -offset_w
-        rgb = rgb[:,offset_w:-offset_w,:]
-        mask = mask[:,offset_w:-offset_w]
-    # resize to target ratio.
-    rgb = np.pad(
-        rgb,
-        ((0, 0), (offset_w, offset_w), (0, 0)),
-        mode="constant",
-        constant_values=255,
-    )
-    mask = np.pad(
-        mask,
-        ((0, 0), (offset_w, offset_w)),
-        mode="constant",
-        constant_values=0,
-    )
-    rgb = rgb / 255.0  # normalize to [0, 1]
-    mask = mask / 255.0
-    mask = (mask > 0.5).astype(np.float32)
-    rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
-    # resize to specific size require by preprocessor of smplx-estimator.
-    rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
-    mask = resize_image_keepaspect_np(mask, max_tgt_size)
-    # crop image to enlarge human area.
-    rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
-        rgb, mask, aspect_standard, enlarge_ratio
-    )
-    if intr is not None:
-        intr[0, 2] -= offset_x
-        intr[1, 2] -= offset_y
-    # resize to render_tgt_size for training
-    tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
-        cur_hw=rgb.shape[:2],
-        aspect_standard=aspect_standard,
-        tgt_size=render_tgt_size,
-        multiply=multiply,
-    )
-    rgb = cv2.resize(
-        rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
-    )
-    mask = cv2.resize(
-        mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
-    )
-    if intr is not None:
-        # ******************** Merge *********************** #
-        intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
-        assert (
-            abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
-        ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
-        assert (
-            abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
-        ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
-        # ******************** Merge *********************** #
-        intr[0, 2] = rgb.shape[1] // 2
-        intr[1, 2] = rgb.shape[0] // 2
-    rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
-    mask = (
-        torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
-    )  # [1, 1, H, W]
-    return rgb, mask, intr
-def parse_configs():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--config", type=str)
-    parser.add_argument("--infer", type=str)
-    args, unknown = parser.parse_known_args()
-    cfg = OmegaConf.create()
-    cli_cfg = OmegaConf.from_cli(unknown)
-    # parse from ENV
-    if os.environ.get("APP_INFER") is not None:
-        args.infer = os.environ.get("APP_INFER")
-    if os.environ.get("APP_MODEL_NAME") is not None:
-        cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
-    args.config = args.infer if args.config is None else args.config
-    if args.config is not None:
-        cfg_train = OmegaConf.load(args.config)
-        cfg.source_size = cfg_train.dataset.source_image_res
-        try:
-            cfg.src_head_size = cfg_train.dataset.src_head_size
-        except:
-            cfg.src_head_size = 112
-        cfg.render_size = cfg_train.dataset.render_image.high
-        _relative_path = os.path.join(
-            cfg_train.experiment.parent,
-            cfg_train.experiment.child,
-            os.path.basename(cli_cfg.model_name).split("_")[-1],
-        )
-        cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
-        cfg.image_dump = os.path.join("exps", "images", _relative_path)
-        cfg.video_dump = os.path.join("exps", "videos", _relative_path)  # output path
-    if args.infer is not None:
-        cfg_infer = OmegaConf.load(args.infer)
-        cfg.merge_with(cfg_infer)
-        cfg.setdefault(
-            "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
-        )
-        cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
-        cfg.setdefault(
-            "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
-        )
-        cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
-    cfg.motion_video_read_fps = 6
-    cfg.merge_with(cli_cfg)
-    cfg.setdefault("logger", "INFO")
-    assert cfg.model_name is not None, "model_name is required"
-    return cfg, cfg_train
-def _build_model(cfg):
-    from LHM.models import model_dict
-    hf_model_cls = wrap_model_hub(model_dict["human_lrm_sapdino_bh_sd3_5"])
-    model = hf_model_cls.from_pretrained(cfg.model_name)
-    return model
-def launch_pretrained():
-    from huggingface_hub import snapshot_download, hf_hub_download
-    hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='assets.tar', local_dir="./")
-    os.system("tar -xvf assets.tar && rm assets.tar")
-    hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
-    os.system("tar -xvf LHM-0.5B.tar && rm LHM-0.5B.tar")
-    hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
-    os.system("tar -xvf LHM_prior_model.tar && rm LHM_prior_model.tar")
-def launch_env_not_compile_with_cuda():
-    os.system("pip install chumpy")
-    os.system("pip uninstall -y basicsr")
-    os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
-    # os.system("pip install -e ./third_party/sam2")
-    os.system("pip install numpy==1.23.0")
-    # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
-    # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
-    # os.system("pip install git+https://github.com/camenduru/simple-knn/")
-    os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html")
-def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
-    '''Inference code avoid repeat forward.
-    '''
-    render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int(
-        render_intrs[0, 0, 0, 2] * 2
-    )
-    # render target views
-    render_res_list = []
-    num_views = render_c2ws.shape[1]
-    start_time = time.time()
-    # render target views
-    render_res_list = []
-    for view_idx in range(num_views):
-        render_res = renderer.forward_animate_gs(
-            gs_model_list,
-            query_points,
-            renderer.get_single_view_smpl_data(smplx_params, view_idx),
-            render_c2ws[:, view_idx : view_idx + 1],
-            render_intrs[:, view_idx : view_idx + 1],
-            render_h,
-            render_w,
-            render_bg_colors[:, view_idx : view_idx + 1],
-        )
-        render_res_list.append(render_res)
-    print(
-        f"time elpased(animate gs model per frame):{(time.time() -  start_time)/num_views}"
-    )
-    out = defaultdict(list)
-    for res in render_res_list:
-        for k, v in res.items():
-            if isinstance(v[0], torch.Tensor):
-                out[k].append(v.detach().cpu())
-            else:
-                out[k].append(v)
-    for k, v in out.items():
-        # print(f"out key:{k}")
-        if isinstance(v[0], torch.Tensor):
-            out[k] = torch.concat(v, dim=1)
-            if k in ["comp_rgb", "comp_mask", "comp_depth"]:
-                out[k] = out[k][0].permute(
-                    0, 2, 3, 1
-                )  # [1, Nv, 3, H, W] -> [Nv, 3, H, W] - > [Nv, H, W, 3]
-        else:
-            out[k] = v
-    return out
-def assert_input_image(input_image):
-    if input_image is None:
-        raise gr.Error("No image selected or uploaded!")
-def prepare_working_dir():
-    import tempfile
-    working_dir = tempfile.TemporaryDirectory()
-    return working_dir
-def init_preprocessor():
-    from LHM.utils.preprocess import Preprocessor
-    global preprocessor
-    preprocessor = Preprocessor()
-def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool, working_dir):
-    image_raw = os.path.join(working_dir.name, "raw.png")
-    with Image.fromarray(image_in) as img:
-        img.save(image_raw)
-    image_out = os.path.join(working_dir.name, "rembg.png")
-    success = preprocessor.preprocess(image_path=image_raw, save_path=image_out, rmbg=remove_bg, recenter=recenter)
-    assert success, f"Failed under preprocess_fn!"
-    return image_out
-def get_image_base64(path):
-    with open(path, "rb") as image_file:
-        encoded_string = base64.b64encode(image_file.read()).decode()
-    return f"data:image/png;base64,{encoded_string}"
-def demo_lhm(pose_estimator, face_detector, lhm, cfg):
-    @spaces.GPU
-    def core_fn(image: str, video_params, working_dir):
-        image_raw = os.path.join(working_dir.name, "raw.png")
-        with Image.fromarray(image) as img:
-            img.save(image_raw)
-        base_vid = os.path.basename(video_params).split("_")[0]
-        smplx_params_dir = os.path.join("./assets/sample_motion", base_vid, "smplx_params")
-        dump_video_path = os.path.join(working_dir.name, "output.mp4")
-        dump_image_path = os.path.join(working_dir.name, "output.png")
-        # prepare dump paths
-        omit_prefix = os.path.dirname(image_raw)
-        image_name = os.path.basename(image_raw)
-        uid = image_name.split(".")[0]
-        subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
-        subdir_path = (
-            subdir_path[1:] if subdir_path.startswith("/") else subdir_path
-        )
-        print("subdir_path and uid:", subdir_path, uid)
-        motion_seqs_dir = smplx_params_dir
-        motion_name = os.path.dirname(
-            motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
-        )
-        motion_name = os.path.basename(motion_name)
-        dump_image_dir = os.path.dirname(dump_image_path)
-        os.makedirs(dump_image_dir, exist_ok=True)
-        print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
-        dump_tmp_dir = dump_image_dir
-        shape_pose = pose_estimator(image_raw)
-        assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}"
-        if os.path.exists(dump_video_path):
-            return dump_image_path, dump_video_path
-        source_size = cfg.source_size
-        render_size = cfg.render_size
-        render_fps = 30
-        aspect_standard = 5.0 / 3
-        motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
-        vis_motion = cfg.get("vis_motion", False)  # False
-        input_np = cv2.imread(image_raw)
-        output_np = remove(input_np)
-        parsing_mask = output_np[:,:,3]
-        # prepare reference image
-        image, _, _ = infer_preprocess_image(
-            image_raw,
-            mask=parsing_mask,
-            intr=None,
-            pad_ratio=0,
-            bg_color=1.0,
-            max_tgt_size=896,
-            aspect_standard=aspect_standard,
-            enlarge_ratio=[1.0, 1.0],
-            render_tgt_size=source_size,
-            multiply=14,
-            need_mask=True,
-        )
-        try:
-            rgb = np.array(Image.open(image_path))
-            rgb = torch.from_numpy(rgb).permute(2, 0, 1)
-            bbox = face_detector.detect_face(rgb)
-            head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
-            head_rgb = head_rgb.permute(1, 2, 0)
-            src_head_rgb = head_rgb.cpu().numpy()
-        except:
-            print("w/o head input!")
-            src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
-        # resize to dino size
-        try:
-            src_head_rgb = cv2.resize(
-                src_head_rgb,
-                dsize=(cfg.src_head_size, cfg.src_head_size),
-                interpolation=cv2.INTER_AREA,
-            )  # resize to dino size
-        except:
-            src_head_rgb = np.zeros(
-                (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
-            )
-        src_head_rgb = (
-            torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
-        )  # [1, 3, H, W]
-        save_ref_img_path = os.path.join(
-            dump_tmp_dir, "output.png"
-        )
-        vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
-            np.uint8
-        )
-        Image.fromarray(vis_ref_img).save(save_ref_img_path)
-        # read motion seq
-        motion_name = os.path.dirname(
-            motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
-        )
-        motion_name = os.path.basename(motion_name)
-        motion_seq = prepare_motion_seqs(
-            motion_seqs_dir,
-            None,
-            save_root=dump_tmp_dir,
-            fps=30,
-            bg_color=1.0,
-            aspect_standard=aspect_standard,
-            enlarge_ratio=[1.0, 1, 0],
-            render_image_res=render_size,
-            multiply=16,
-            need_mask=motion_img_need_mask,
-            vis_motion=vis_motion,
-        )
-        camera_size = len(motion_seq["motion_seqs"])
-        shape_param = shape_pose.beta
-        device = "cuda"
-        dtype = torch.float32
-        shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
-        lhm.to(dtype)
-        smplx_params = motion_seq['smplx_params']
-        smplx_params['betas'] = shape_param.to(device)
-        gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
-            image.unsqueeze(0).to(device, dtype),
-            src_head_rgb.unsqueeze(0).to(device, dtype),
-            None,
-            None,
-            render_c2ws=motion_seq["render_c2ws"].to(device),
-            render_intrs=motion_seq["render_intrs"].to(device),
-            render_bg_colors=motion_seq["render_bg_colors"].to(device),
-            smplx_params={
-                k: v.to(device) for k, v in smplx_params.items()
-            },
-        )
-        # rendering !!!!
-        start_time = time.time()
-        batch_dict = dict()
-        batch_size = 40  # avoid memeory out!
-        for batch_i in range(0, camera_size, batch_size):
-            with torch.no_grad():
-                # TODO check device and dtype
-                # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
-                keys = [
-                    "root_pose",
-                    "body_pose",
-                    "jaw_pose",
-                    "leye_pose",
-                    "reye_pose",
-                    "lhand_pose",
-                    "rhand_pose",
-                    "trans",
-                    "focal",
-                    "princpt",
-                    "img_size_wh",
-                    "expr",
-                ]
-                batch_smplx_params = dict()
-                batch_smplx_params["betas"] = shape_param.to(device)
-                batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
-                for key in keys:
-                    batch_smplx_params[key] = motion_seq["smplx_params"][key][
-                        :, batch_i : batch_i + batch_size
-                    ].to(device)
-                res = lhm.animation_infer(gs_model_list, query_points, batch_smplx_params,
-                    render_c2ws=motion_seq["render_c2ws"][
-                        :, batch_i : batch_i + batch_size
-                    ].to(device),
-                    render_intrs=motion_seq["render_intrs"][
-                        :, batch_i : batch_i + batch_size
-                    ].to(device),
-                    render_bg_colors=motion_seq["render_bg_colors"][
-                        :, batch_i : batch_i + batch_size
-                    ].to(device),
-                )
-            for accumulate_key in ["comp_rgb", "comp_mask"]:
-                if accumulate_key not in batch_dict:
-                    batch_dict[accumulate_key] = []
-                batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
-            del res
-            torch.cuda.empty_cache()
-        for accumulate_key in ["comp_rgb", "comp_mask"]:
-            batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
-        print(f"time elapsed: {time.time() - start_time}")
-        rgb = batch_dict["comp_rgb"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
-        mask = batch_dict["comp_mask"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
-        mask[mask < 0.5] = 0.0
-        rgb = rgb * mask + (1 - mask) * 1
-        rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
-        if vis_motion:
-            # print(rgb.shape, motion_seq["vis_motion_render"].shape)
-            vis_ref_img = np.tile(
-                cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
-                    None, :, :, :
-                ],
-                (rgb.shape[0], 1, 1, 1),
-            )
-            rgb = np.concatenate(
-                [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
-            )
-        os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
-        images_to_video(
-            rgb,
-            output_path=dump_video_path,
-            fps=render_fps,
-            gradio_codec=False,
-            verbose=True,
-        )
-        # self.infer_single(
-        #     image_path,
-        #     motion_seqs_dir=motion_seqs_dir,
-        #     motion_img_dir=None,
-        #     motion_video_read_fps=30,
-        #     export_video=False,
-        #     export_mesh=False,
-        #     dump_tmp_dir=dump_image_dir,
-        #     dump_image_dir=dump_image_dir,
-        #     dump_video_path=dump_video_path,
-        #     shape_param=shape_pose.beta,
-        # )
-        # status = spaces.GPU(infer_impl(
-        #     gradio_demo_image=image_raw,
-        #     gradio_motion_file=smplx_params_dir,
-        #     gradio_masked_image=dump_image_path,
-        #     gradio_video_save_path=dump_video_path
-        # ))
-        return dump_image_path, dump_video_path
-        # if status:
-        #     return dump_image_path, dump_video_path
-        # else:
-        #     return None, None
-    _TITLE = '''LHM: Large Animatable Human Model'''
-    _DESCRIPTION = '''
-        <strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong>
-    '''
-    with gr.Blocks(analytics_enabled=False) as demo:
-        # </div>
-        logo_url = "./assets/rgba_logo_new.png"
-        logo_base64 = get_image_base64(logo_url)
-        gr.HTML(
-            f"""
-            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
-            <div>
-                <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1>
-            </div>
-            </div>
-            """
-        )
-        gr.HTML(
-            """<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors.</h4></p>"""
-        )
-        # DISPLAY
-        with gr.Row():
-            with gr.Column(variant='panel', scale=1):
-                with gr.Tabs(elem_id="openlrm_input_image"):
-                    with gr.TabItem('Input Image'):
-                        with gr.Row():
-                            input_image = gr.Image(label="Input Image", image_mode="RGBA", height=480, width=270, sources="upload", type="numpy", elem_id="content_image")
-                # EXAMPLES
-                with gr.Row():
-                    examples = [
-                        ['assets/sample_input/joker.jpg'],
-                        ['assets/sample_input/anime.png'],
-                        ['assets/sample_input/basket.png'],
-                        ['assets/sample_input/ai_woman1.JPG'],
-                        ['assets/sample_input/anime2.JPG'],
-                        ['assets/sample_input/anime3.JPG'],
-                        ['assets/sample_input/boy1.png'],
-                        ['assets/sample_input/choplin.jpg'],
-                        ['assets/sample_input/eins.JPG'],
-                        ['assets/sample_input/girl1.png'],
-                        ['assets/sample_input/girl2.png'],
-                        ['assets/sample_input/robot.jpg'],
-                    ]
-                    gr.Examples(
-                        examples=examples,
-                        inputs=[input_image],
-                        examples_per_page=20,
-                    )
-            with gr.Column():
-                with gr.Tabs(elem_id="openlrm_input_video"):
-                    with gr.TabItem('Input Video'):
-                        with gr.Row():
-                            video_input = gr.Video(label="Input Video",height=480, width=270, interactive=False)
-                examples = [
-                    # './assets/sample_motion/danaotiangong/danaotiangong_origin.mp4',
-                    './assets/sample_motion/ex5/ex5_origin.mp4',
-                    './assets/sample_motion/girl2/girl2_origin.mp4',
-                    './assets/sample_motion/jntm/jntm_origin.mp4',
-                    './assets/sample_motion/mimo1/mimo1_origin.mp4',
-                    './assets/sample_motion/mimo2/mimo2_origin.mp4',
-                    './assets/sample_motion/mimo4/mimo4_origin.mp4',
-                    './assets/sample_motion/mimo5/mimo5_origin.mp4',
-                    './assets/sample_motion/mimo6/mimo6_origin.mp4',
-                    './assets/sample_motion/nezha/nezha_origin.mp4',
-                    './assets/sample_motion/taiji/taiji_origin.mp4'
-                ]
-                gr.Examples(
-                    examples=examples,
-                    inputs=[video_input],
-                    examples_per_page=20,
-                )
-            with gr.Column(variant='panel', scale=1):
-                with gr.Tabs(elem_id="openlrm_processed_image"):
-                    with gr.TabItem('Processed Image'):
-                        with gr.Row():
-                            processed_image = gr.Image(label="Processed Image", image_mode="RGBA", type="filepath", elem_id="processed_image", height=480, width=270, interactive=False)
-            with gr.Column(variant='panel', scale=1):
-                with gr.Tabs(elem_id="openlrm_render_video"):
-                    with gr.TabItem('Rendered Video'):
-                        with gr.Row():
-                            output_video = gr.Video(label="Rendered Video", format="mp4", height=480, width=270, autoplay=True)
-        # SETTING
-        with gr.Row():
-            with gr.Column(variant='panel', scale=1):
-                submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary')
-        working_dir = gr.State()
-        submit.click(
-            fn=assert_input_image,
-            inputs=[input_image],
-            queue=False,
-        ).success(
-            fn=prepare_working_dir,
-            outputs=[working_dir],
-            queue=False,
-        ).success(
-            fn=core_fn,
-            inputs=[input_image, video_input, working_dir], # video_params refer to smpl dir
-            outputs=[processed_image, output_video],
-        )
-        demo.queue()
-        demo.launch()
-def launch_gradio_app():
-    os.environ.update({
-        "APP_ENABLED": "1",
-        "APP_MODEL_NAME": "./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/",
-        "APP_INFER": "./configs/inference/human-lrm-500M.yaml",
-        "APP_TYPE": "infer.human_lrm",
-        "NUMBA_THREADING_LAYER": 'omp',
-    })
-    # from LHM.runners import REGISTRY_RUNNERS
-    # RunnerClass = REGISTRY_RUNNERS[os.getenv("APP_TYPE")]
-    # with RunnerClass() as runner:
-    #     runner.to('cuda')
-    #     demo_lhm(infer_impl=runner.infer)
-    facedetector = VGGHeadDetector(
-        "./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd",
-        device='cpu',
-    )
-    facedetector.to('cuda')
-    pose_estimator = PoseEstimator(
-        "./pretrained_models/human_model_files/", device='cpu'
-    )
-    pose_estimator.to('cuda')
-    pose_estimator.device = 'cuda'
-    cfg, cfg_train = parse_configs()
-    lhm = _build_model(cfg)
-    lhm.to('cuda')
-    demo_lhm(pose_estimator, facedetector, lhm, cfg)
-if __name__ == '__main__':
-    # launch_pretrained()
-    # launch_env_not_compile_with_cuda()
-    # os.system("rm -rf /data-nvme/zerogpu-offload/")
-    launch_gradio_app()
-# import gradio as gr
-# def greet(name):
-#     return "Hello " + name + "!!"
-# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-# demo.launch()

 # limitations under the License.
+# import os
+# os.system("rm -rf /data-nvme/zerogpu-offload/")
+# import cv2
+# import time
+# from PIL import Image
+# import numpy as np
+# import gradio as gr
+# import base64
+# import spaces
+# import torch
+# torch._dynamo.config.disable = True
+# import subprocess
+# import os
+# import argparse
+# from omegaconf import OmegaConf
+# from rembg import remove
+# from engine.pose_estimation.pose_estimator import PoseEstimator
+# from LHM.utils.face_detector import VGGHeadDetector
+# from LHM.utils.hf_hub import wrap_model_hub
+# from LHM.runners.infer.utils import (
+#     calc_new_tgt_size_by_aspect,
+#     center_crop_according_to_mask,
+#     prepare_motion_seqs,
+#     resize_image_keepaspect_np,
+# )
+# from engine.SegmentAPI.base import Bbox
+# def get_bbox(mask):
+#     height, width = mask.shape
+#     pha = mask / 255.0
+#     pha[pha < 0.5] = 0.0
+#     pha[pha >= 0.5] = 1.0
+#     # obtain bbox
+#     _h, _w = np.where(pha == 1)
+#     whwh = [
+#         _w.min().item(),
+#         _h.min().item(),
+#         _w.max().item(),
+#         _h.max().item(),
+#     ]
+#     box = Bbox(whwh)
+#     # scale box to 1.05
+#     scale_box = box.scale(1.1, width=width, height=height)
+#     return scale_box
+# def infer_preprocess_image(
+#     rgb_path,
+#     mask,
+#     intr,
+#     pad_ratio,
+#     bg_color,
+#     max_tgt_size,
+#     aspect_standard,
+#     enlarge_ratio,
+#     render_tgt_size,
+#     multiply,
+#     need_mask=True,
+# ):
+#     """inferece
+#     image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
+#                                         max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
+#                                         render_tgt_size=source_size, multiply=14, need_mask=True)
+#     """
+#     rgb = np.array(Image.open(rgb_path))
+#     rgb_raw = rgb.copy()
+#     bbox = get_bbox(mask)
+#     bbox_list = bbox.get_box()
+#     rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+#     mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
+#     h, w, _ = rgb.shape
+#     assert w < h
+#     cur_ratio = h / w
+#     scale_ratio = cur_ratio / aspect_standard
+#     target_w = int(min(w * scale_ratio, h))
+#     offset_w = (target_w - w) // 2
+#     # resize to target ratio.
+#     if offset_w > 0:
+#         rgb = np.pad(
+#             rgb,
+#             ((0, 0), (offset_w, offset_w), (0, 0)),
+#             mode="constant",
+#             constant_values=255,
+#         )
+#         mask = np.pad(
+#             mask,
+#             ((0, 0), (offset_w, offset_w)),
+#             mode="constant",
+#             constant_values=0,
+#         )
+#     else:
+#         offset_w = -offset_w
+#         rgb = rgb[:,offset_w:-offset_w,:]
+#         mask = mask[:,offset_w:-offset_w]
+#     # resize to target ratio.
+#     rgb = np.pad(
+#         rgb,
+#         ((0, 0), (offset_w, offset_w), (0, 0)),
+#         mode="constant",
+#         constant_values=255,
+#     )
+#     mask = np.pad(
+#         mask,
+#         ((0, 0), (offset_w, offset_w)),
+#         mode="constant",
+#         constant_values=0,
+#     )
+#     rgb = rgb / 255.0  # normalize to [0, 1]
+#     mask = mask / 255.0
+#     mask = (mask > 0.5).astype(np.float32)
+#     rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
+#     # resize to specific size require by preprocessor of smplx-estimator.
+#     rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
+#     mask = resize_image_keepaspect_np(mask, max_tgt_size)
+#     # crop image to enlarge human area.
+#     rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
+#         rgb, mask, aspect_standard, enlarge_ratio
+#     )
+#     if intr is not None:
+#         intr[0, 2] -= offset_x
+#         intr[1, 2] -= offset_y
+#     # resize to render_tgt_size for training
+#     tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
+#         cur_hw=rgb.shape[:2],
+#         aspect_standard=aspect_standard,
+#         tgt_size=render_tgt_size,
+#         multiply=multiply,
+#     )
+#     rgb = cv2.resize(
+#         rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+#     )
+#     mask = cv2.resize(
+#         mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
+#     )
+#     if intr is not None:
+#         # ******************** Merge *********************** #
+#         intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
+#         assert (
+#             abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
+#         ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
+#         assert (
+#             abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
+#         ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
+#         # ******************** Merge *********************** #
+#         intr[0, 2] = rgb.shape[1] // 2
+#         intr[1, 2] = rgb.shape[0] // 2
+#     rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+#     mask = (
+#         torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
+#     )  # [1, 1, H, W]
+#     return rgb, mask, intr
+# def parse_configs():
+#     parser = argparse.ArgumentParser()
+#     parser.add_argument("--config", type=str)
+#     parser.add_argument("--infer", type=str)
+#     args, unknown = parser.parse_known_args()
+#     cfg = OmegaConf.create()
+#     cli_cfg = OmegaConf.from_cli(unknown)
+#     # parse from ENV
+#     if os.environ.get("APP_INFER") is not None:
+#         args.infer = os.environ.get("APP_INFER")
+#     if os.environ.get("APP_MODEL_NAME") is not None:
+#         cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
+#     args.config = args.infer if args.config is None else args.config
+#     if args.config is not None:
+#         cfg_train = OmegaConf.load(args.config)
+#         cfg.source_size = cfg_train.dataset.source_image_res
+#         try:
+#             cfg.src_head_size = cfg_train.dataset.src_head_size
+#         except:
+#             cfg.src_head_size = 112
+#         cfg.render_size = cfg_train.dataset.render_image.high
+#         _relative_path = os.path.join(
+#             cfg_train.experiment.parent,
+#             cfg_train.experiment.child,
+#             os.path.basename(cli_cfg.model_name).split("_")[-1],
+#         )
+#         cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
+#         cfg.image_dump = os.path.join("exps", "images", _relative_path)
+#         cfg.video_dump = os.path.join("exps", "videos", _relative_path)  # output path
+#     if args.infer is not None:
+#         cfg_infer = OmegaConf.load(args.infer)
+#         cfg.merge_with(cfg_infer)
+#         cfg.setdefault(
+#             "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
+#         )
+#         cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
+#         cfg.setdefault(
+#             "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
+#         )
+#         cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
+#     cfg.motion_video_read_fps = 6
+#     cfg.merge_with(cli_cfg)
+#     cfg.setdefault("logger", "INFO")
+#     assert cfg.model_name is not None, "model_name is required"
+#     return cfg, cfg_train
+# def _build_model(cfg):
+#     from LHM.models import model_dict
+#     hf_model_cls = wrap_model_hub(model_dict["human_lrm_sapdino_bh_sd3_5"])
+#     model = hf_model_cls.from_pretrained(cfg.model_name)
+#     return model
+# def launch_pretrained():
+#     from huggingface_hub import snapshot_download, hf_hub_download
+#     hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='assets.tar', local_dir="./")
+#     os.system("tar -xvf assets.tar && rm assets.tar")
+#     hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
+#     os.system("tar -xvf LHM-0.5B.tar && rm LHM-0.5B.tar")
+#     hf_hub_download(repo_id="DyrusQZ/LHM_Runtime", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
+#     os.system("tar -xvf LHM_prior_model.tar && rm LHM_prior_model.tar")
+# def launch_env_not_compile_with_cuda():
+#     os.system("pip install chumpy")
+#     os.system("pip uninstall -y basicsr")
+#     os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
+#     # os.system("pip install -e ./third_party/sam2")
+#     os.system("pip install numpy==1.23.0")
+#     # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
+#     # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
+#     # os.system("pip install git+https://github.com/camenduru/simple-knn/")
+#     os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html")
+# def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
+#     '''Inference code avoid repeat forward.
+#     '''
+#     render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int(
+#         render_intrs[0, 0, 0, 2] * 2
+#     )
+#     # render target views
+#     render_res_list = []
+#     num_views = render_c2ws.shape[1]
+#     start_time = time.time()
+#     # render target views
+#     render_res_list = []
+#     for view_idx in range(num_views):
+#         render_res = renderer.forward_animate_gs(
+#             gs_model_list,
+#             query_points,
+#             renderer.get_single_view_smpl_data(smplx_params, view_idx),
+#             render_c2ws[:, view_idx : view_idx + 1],
+#             render_intrs[:, view_idx : view_idx + 1],
+#             render_h,
+#             render_w,
+#             render_bg_colors[:, view_idx : view_idx + 1],
+#         )
+#         render_res_list.append(render_res)
+#     print(
+#         f"time elpased(animate gs model per frame):{(time.time() -  start_time)/num_views}"
+#     )
+#     out = defaultdict(list)
+#     for res in render_res_list:
+#         for k, v in res.items():
+#             if isinstance(v[0], torch.Tensor):
+#                 out[k].append(v.detach().cpu())
+#             else:
+#                 out[k].append(v)
+#     for k, v in out.items():
+#         # print(f"out key:{k}")
+#         if isinstance(v[0], torch.Tensor):
+#             out[k] = torch.concat(v, dim=1)
+#             if k in ["comp_rgb", "comp_mask", "comp_depth"]:
+#                 out[k] = out[k][0].permute(
+#                     0, 2, 3, 1
+#                 )  # [1, Nv, 3, H, W] -> [Nv, 3, H, W] - > [Nv, H, W, 3]
+#         else:
+#             out[k] = v
+#     return out
+# def assert_input_image(input_image):
+#     if input_image is None:
+#         raise gr.Error("No image selected or uploaded!")
+# def prepare_working_dir():
+#     import tempfile
+#     working_dir = tempfile.TemporaryDirectory()
+#     return working_dir
+# def init_preprocessor():
+#     from LHM.utils.preprocess import Preprocessor
+#     global preprocessor
+#     preprocessor = Preprocessor()
+# def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool, working_dir):
+#     image_raw = os.path.join(working_dir.name, "raw.png")
+#     with Image.fromarray(image_in) as img:
+#         img.save(image_raw)
+#     image_out = os.path.join(working_dir.name, "rembg.png")
+#     success = preprocessor.preprocess(image_path=image_raw, save_path=image_out, rmbg=remove_bg, recenter=recenter)
+#     assert success, f"Failed under preprocess_fn!"
+#     return image_out
+# def get_image_base64(path):
+#     with open(path, "rb") as image_file:
+#         encoded_string = base64.b64encode(image_file.read()).decode()
+#     return f"data:image/png;base64,{encoded_string}"
+# def demo_lhm(pose_estimator, face_detector, lhm, cfg):
+#     @spaces.GPU
+#     def core_fn(image: str, video_params, working_dir):
+#         image_raw = os.path.join(working_dir.name, "raw.png")
+#         with Image.fromarray(image) as img:
+#             img.save(image_raw)
+#         base_vid = os.path.basename(video_params).split("_")[0]
+#         smplx_params_dir = os.path.join("./assets/sample_motion", base_vid, "smplx_params")
+#         dump_video_path = os.path.join(working_dir.name, "output.mp4")
+#         dump_image_path = os.path.join(working_dir.name, "output.png")
+#         # prepare dump paths
+#         omit_prefix = os.path.dirname(image_raw)
+#         image_name = os.path.basename(image_raw)
+#         uid = image_name.split(".")[0]
+#         subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
+#         subdir_path = (
+#             subdir_path[1:] if subdir_path.startswith("/") else subdir_path
+#         )
+#         print("subdir_path and uid:", subdir_path, uid)
+#         motion_seqs_dir = smplx_params_dir
+#         motion_name = os.path.dirname(
+#             motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
+#         )
+#         motion_name = os.path.basename(motion_name)
+#         dump_image_dir = os.path.dirname(dump_image_path)
+#         os.makedirs(dump_image_dir, exist_ok=True)
+#         print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
+#         dump_tmp_dir = dump_image_dir
+#         shape_pose = pose_estimator(image_raw)
+#         assert shape_pose.is_full_body, f"The input image is illegal, {shape_pose.msg}"
+#         if os.path.exists(dump_video_path):
+#             return dump_image_path, dump_video_path
+#         source_size = cfg.source_size
+#         render_size = cfg.render_size
+#         render_fps = 30
+#         aspect_standard = 5.0 / 3
+#         motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
+#         vis_motion = cfg.get("vis_motion", False)  # False
+#         input_np = cv2.imread(image_raw)
+#         output_np = remove(input_np)
+#         # cv2.imwrite("./vis.png", output_np)
+#         parsing_mask = output_np[:,:,3]
+#         # prepare reference image
+#         image, _, _ = infer_preprocess_image(
+#             image_raw,
+#             mask=parsing_mask,
+#             intr=None,
+#             pad_ratio=0,
+#             bg_color=1.0,
+#             max_tgt_size=896,
+#             aspect_standard=aspect_standard,
+#             enlarge_ratio=[1.0, 1.0],
+#             render_tgt_size=source_size,
+#             multiply=14,
+#             need_mask=True,
+#         )
+#         try:
+#             rgb = np.array(Image.open(image_path))
+#             rgb = torch.from_numpy(rgb).permute(2, 0, 1)
+#             bbox = face_detector.detect_face(rgb)
+#             head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
+#             head_rgb = head_rgb.permute(1, 2, 0)
+#             src_head_rgb = head_rgb.cpu().numpy()
+#         except:
+#             print("w/o head input!")
+#             src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
+#         # resize to dino size
+#         try:
+#             src_head_rgb = cv2.resize(
+#                 src_head_rgb,
+#                 dsize=(cfg.src_head_size, cfg.src_head_size),
+#                 interpolation=cv2.INTER_AREA,
+#             )  # resize to dino size
+#         except:
+#             src_head_rgb = np.zeros(
+#                 (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
+#             )
+#         src_head_rgb = (
+#             torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
+#         )  # [1, 3, H, W]
+#         save_ref_img_path = os.path.join(
+#             dump_tmp_dir, "output.png"
+#         )
+#         vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
+#             np.uint8
+#         )
+#         Image.fromarray(vis_ref_img).save(save_ref_img_path)
+#         # read motion seq
+#         motion_name = os.path.dirname(
+#             motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
+#         )
+#         motion_name = os.path.basename(motion_name)
+#         motion_seq = prepare_motion_seqs(
+#             motion_seqs_dir,
+#             None,
+#             save_root=dump_tmp_dir,
+#             fps=30,
+#             bg_color=1.0,
+#             aspect_standard=aspect_standard,
+#             enlarge_ratio=[1.0, 1, 0],
+#             render_image_res=render_size,
+#             multiply=16,
+#             need_mask=motion_img_need_mask,
+#             vis_motion=vis_motion,
+#         )
+#         camera_size = len(motion_seq["motion_seqs"])
+#         shape_param = shape_pose.beta
+#         device = "cuda"
+#         dtype = torch.float32
+#         shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
+#         lhm.to(dtype)
+#         smplx_params = motion_seq['smplx_params']
+#         smplx_params['betas'] = shape_param.to(device)
+#         gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
+#             image.unsqueeze(0).to(device, dtype),
+#             src_head_rgb.unsqueeze(0).to(device, dtype),
+#             None,
+#             None,
+#             render_c2ws=motion_seq["render_c2ws"].to(device),
+#             render_intrs=motion_seq["render_intrs"].to(device),
+#             render_bg_colors=motion_seq["render_bg_colors"].to(device),
+#             smplx_params={
+#                 k: v.to(device) for k, v in smplx_params.items()
+#             },
+#         )
+#         # rendering !!!!
+#         start_time = time.time()
+#         batch_dict = dict()
+#         batch_size = 40  # avoid memeory out!
+#         for batch_i in range(0, camera_size, batch_size):
+#             with torch.no_grad():
+#                 # TODO check device and dtype
+#                 # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
+#                 keys = [
+#                     "root_pose",
+#                     "body_pose",
+#                     "jaw_pose",
+#                     "leye_pose",
+#                     "reye_pose",
+#                     "lhand_pose",
+#                     "rhand_pose",
+#                     "trans",
+#                     "focal",
+#                     "princpt",
+#                     "img_size_wh",
+#                     "expr",
+#                 ]
+#                 batch_smplx_params = dict()
+#                 batch_smplx_params["betas"] = shape_param.to(device)
+#                 batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
+#                 for key in keys:
+#                     batch_smplx_params[key] = motion_seq["smplx_params"][key][
+#                         :, batch_i : batch_i + batch_size
+#                     ].to(device)
+#                 res = lhm.animation_infer(gs_model_list, query_points, batch_smplx_params,
+#                     render_c2ws=motion_seq["render_c2ws"][
+#                         :, batch_i : batch_i + batch_size
+#                     ].to(device),
+#                     render_intrs=motion_seq["render_intrs"][
+#                         :, batch_i : batch_i + batch_size
+#                     ].to(device),
+#                     render_bg_colors=motion_seq["render_bg_colors"][
+#                         :, batch_i : batch_i + batch_size
+#                     ].to(device),
+#                 )
+#             for accumulate_key in ["comp_rgb", "comp_mask"]:
+#                 if accumulate_key not in batch_dict:
+#                     batch_dict[accumulate_key] = []
+#                 batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
+#             del res
+#             torch.cuda.empty_cache()
+#         for accumulate_key in ["comp_rgb", "comp_mask"]:
+#             batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
+#         print(f"time elapsed: {time.time() - start_time}")
+#         rgb = batch_dict["comp_rgb"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+#         mask = batch_dict["comp_mask"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+#         mask[mask < 0.5] = 0.0
+#         rgb = rgb * mask + (1 - mask) * 1
+#         rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
+#         if vis_motion:
+#             # print(rgb.shape, motion_seq["vis_motion_render"].shape)
+#             vis_ref_img = np.tile(
+#                 cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
+#                     None, :, :, :
+#                 ],
+#                 (rgb.shape[0], 1, 1, 1),
+#             )
+#             rgb = np.concatenate(
+#                 [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
+#             )
+#         os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
+#         images_to_video(
+#             rgb,
+#             output_path=dump_video_path,
+#             fps=render_fps,
+#             gradio_codec=False,
+#             verbose=True,
+#         )
+#         # self.infer_single(
+#         #     image_path,
+#         #     motion_seqs_dir=motion_seqs_dir,
+#         #     motion_img_dir=None,
+#         #     motion_video_read_fps=30,
+#         #     export_video=False,
+#         #     export_mesh=False,
+#         #     dump_tmp_dir=dump_image_dir,
+#         #     dump_image_dir=dump_image_dir,
+#         #     dump_video_path=dump_video_path,
+#         #     shape_param=shape_pose.beta,
+#         # )
+#         # status = spaces.GPU(infer_impl(
+#         #     gradio_demo_image=image_raw,
+#         #     gradio_motion_file=smplx_params_dir,
+#         #     gradio_masked_image=dump_image_path,
+#         #     gradio_video_save_path=dump_video_path
+#         # ))
+#         return dump_image_path, dump_video_path
+#         # if status:
+#         #     return dump_image_path, dump_video_path
+#         # else:
+#         #     return None, None
+#     _TITLE = '''LHM: Large Animatable Human Model'''
+#     _DESCRIPTION = '''
+#         <strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong>
+#     '''
+#     with gr.Blocks(analytics_enabled=False) as demo:
+#         # </div>
+#         logo_url = "./assets/rgba_logo_new.png"
+#         logo_base64 = get_image_base64(logo_url)
+#         gr.HTML(
+#             f"""
+#             <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+#             <div>
+#                 <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1>
+#             </div>
+#             </div>
+#             """
+#         )
+#         gr.HTML(
+#             """<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors.</h4></p>"""
+#         )
+#         # DISPLAY
+#         with gr.Row():
+#             with gr.Column(variant='panel', scale=1):
+#                 with gr.Tabs(elem_id="openlrm_input_image"):
+#                     with gr.TabItem('Input Image'):
+#                         with gr.Row():
+#                             input_image = gr.Image(label="Input Image", image_mode="RGBA", height=480, width=270, sources="upload", type="numpy", elem_id="content_image")
+#                 # EXAMPLES
+#                 with gr.Row():
+#                     examples = [
+#                         ['assets/sample_input/joker.jpg'],
+#                         ['assets/sample_input/anime.png'],
+#                         ['assets/sample_input/basket.png'],
+#                         ['assets/sample_input/ai_woman1.JPG'],
+#                         ['assets/sample_input/anime2.JPG'],
+#                         ['assets/sample_input/anime3.JPG'],
+#                         ['assets/sample_input/boy1.png'],
+#                         ['assets/sample_input/choplin.jpg'],
+#                         ['assets/sample_input/eins.JPG'],
+#                         ['assets/sample_input/girl1.png'],
+#                         ['assets/sample_input/girl2.png'],
+#                         ['assets/sample_input/robot.jpg'],
+#                     ]
+#                     gr.Examples(
+#                         examples=examples,
+#                         inputs=[input_image],
+#                         examples_per_page=20,
+#                     )
+#             with gr.Column():
+#                 with gr.Tabs(elem_id="openlrm_input_video"):
+#                     with gr.TabItem('Input Video'):
+#                         with gr.Row():
+#                             video_input = gr.Video(label="Input Video",height=480, width=270, interactive=False)
+#                 examples = [
+#                     # './assets/sample_motion/danaotiangong/danaotiangong_origin.mp4',
+#                     './assets/sample_motion/ex5/ex5_origin.mp4',
+#                     './assets/sample_motion/girl2/girl2_origin.mp4',
+#                     './assets/sample_motion/jntm/jntm_origin.mp4',
+#                     './assets/sample_motion/mimo1/mimo1_origin.mp4',
+#                     './assets/sample_motion/mimo2/mimo2_origin.mp4',
+#                     './assets/sample_motion/mimo4/mimo4_origin.mp4',
+#                     './assets/sample_motion/mimo5/mimo5_origin.mp4',
+#                     './assets/sample_motion/mimo6/mimo6_origin.mp4',
+#                     './assets/sample_motion/nezha/nezha_origin.mp4',
+#                     './assets/sample_motion/taiji/taiji_origin.mp4'
+#                 ]
+#                 gr.Examples(
+#                     examples=examples,
+#                     inputs=[video_input],
+#                     examples_per_page=20,
+#                 )
+#             with gr.Column(variant='panel', scale=1):
+#                 with gr.Tabs(elem_id="openlrm_processed_image"):
+#                     with gr.TabItem('Processed Image'):
+#                         with gr.Row():
+#                             processed_image = gr.Image(label="Processed Image", image_mode="RGBA", type="filepath", elem_id="processed_image", height=480, width=270, interactive=False)
+#             with gr.Column(variant='panel', scale=1):
+#                 with gr.Tabs(elem_id="openlrm_render_video"):
+#                     with gr.TabItem('Rendered Video'):
+#                         with gr.Row():
+#                             output_video = gr.Video(label="Rendered Video", format="mp4", height=480, width=270, autoplay=True)
+#         # SETTING
+#         with gr.Row():
+#             with gr.Column(variant='panel', scale=1):
+#                 submit = gr.Button('Generate', elem_id="openlrm_generate", variant='primary')
+#         working_dir = gr.State()
+#         submit.click(
+#             fn=assert_input_image,
+#             inputs=[input_image],
+#             queue=False,
+#         ).success(
+#             fn=prepare_working_dir,
+#             outputs=[working_dir],
+#             queue=False,
+#         ).success(
+#             fn=core_fn,
+#             inputs=[input_image, video_input, working_dir], # video_params refer to smpl dir
+#             outputs=[processed_image, output_video],
+#         )
+#         demo.queue()
+#         demo.launch()
+# def launch_gradio_app():
+#     os.environ.update({
+#         "APP_ENABLED": "1",
+#         "APP_MODEL_NAME": "./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/",
+#         "APP_INFER": "./configs/inference/human-lrm-500M.yaml",
+#         "APP_TYPE": "infer.human_lrm",
+#         "NUMBA_THREADING_LAYER": 'omp',
+#     })
+#     # from LHM.runners import REGISTRY_RUNNERS
+#     # RunnerClass = REGISTRY_RUNNERS[os.getenv("APP_TYPE")]
+#     # with RunnerClass() as runner:
+#     #     runner.to('cuda')
+#     #     demo_lhm(infer_impl=runner.infer)
+#     facedetector = VGGHeadDetector(
+#         "./pretrained_models/gagatracker/vgghead/vgg_heads_l.trcd",
+#         device='cpu',
+#     )
+#     facedetector.to('cuda')
+#     pose_estimator = PoseEstimator(
+#         "./pretrained_models/human_model_files/", device='cpu'
+#     )
+#     pose_estimator.to('cuda')
+#     pose_estimator.device = 'cuda'
+#     cfg, cfg_train = parse_configs()
+#     lhm = _build_model(cfg)
+#     lhm.to('cuda')
+#     demo_lhm(pose_estimator, facedetector, lhm, cfg)
+# if __name__ == '__main__':
+#     # launch_pretrained()
+#     # launch_env_not_compile_with_cuda()
+#     launch_gradio_app()
+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch()

wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:18969ebb896d921bc4c54630e5edf990898ee9505c2cc46c4feb3486a959ce54
-size 3373299

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0f3184936fcc68139947905916039ddf5973c5e3c0bd2d4680565bf89934e22
+size 3408819

wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21ffecc42d12fe431e71ded0297c2b3ab4586b668a432d41e58d7440a15b274d
-size 3130569

 version https://git-lfs.github.com/spec/v1
+oid sha256:07408595ab166dfcc6cdd4422b8e94cb615abcf7c799ef0d43a66ad746f09373
+size 3182640