DyrusQZ commited on
Commit
45a45e6
·
1 Parent(s): 57f7e9c

update half-body model

Browse files
Files changed (1) hide show
  1. app.py +152 -27
app.py CHANGED
@@ -70,6 +70,132 @@ def get_bbox(mask):
70
  scale_box = box.scale(1.1, width=width, height=height)
71
  return scale_box
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def infer_preprocess_image(
74
  rgb_path,
75
  mask,
@@ -99,21 +225,24 @@ def infer_preprocess_image(
99
  rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
100
  mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
101
 
 
102
  h, w, _ = rgb.shape
103
  assert w < h
104
  cur_ratio = h / w
105
  scale_ratio = cur_ratio / aspect_standard
106
 
 
107
  target_w = int(min(w * scale_ratio, h))
108
- offset_w = (target_w - w) // 2
109
- # resize to target ratio.
110
- if offset_w > 0:
111
  rgb = np.pad(
112
  rgb,
113
  ((0, 0), (offset_w, offset_w), (0, 0)),
114
  mode="constant",
115
  constant_values=255,
116
  )
 
117
  mask = np.pad(
118
  mask,
119
  ((0, 0), (offset_w, offset_w)),
@@ -121,25 +250,22 @@ def infer_preprocess_image(
121
  constant_values=0,
122
  )
123
  else:
124
- offset_w = -offset_w
125
- rgb = rgb[:,offset_w:-offset_w,:]
126
- mask = mask[:,offset_w:-offset_w]
127
 
128
- # resize to target ratio.
129
-
130
- rgb = np.pad(
131
- rgb,
132
- ((0, 0), (offset_w, offset_w), (0, 0)),
133
- mode="constant",
134
- constant_values=255,
135
- )
136
 
137
- mask = np.pad(
138
- mask,
139
- ((0, 0), (offset_w, offset_w)),
140
- mode="constant",
141
- constant_values=0,
142
- )
143
 
144
  rgb = rgb / 255.0 # normalize to [0, 1]
145
  mask = mask / 255.0
@@ -265,20 +391,19 @@ def launch_pretrained():
265
  from huggingface_hub import snapshot_download, hf_hub_download
266
  hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
267
  os.system("tar -xf assets.tar && rm assets.tar")
268
- hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
269
- os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
270
  hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
271
  os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
 
 
 
272
 
273
  def launch_env_not_compile_with_cuda():
274
  os.system("pip install chumpy")
275
  os.system("pip uninstall -y basicsr")
276
  os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
277
  os.system("pip install numpy==1.23.0")
278
- # os.system("pip install git+https://github.com/hitsz-zuoqi/sam2/")
279
- # os.system("pip install git+https://github.com/ashawkey/diff-gaussian-rasterization/")
280
- # os.system("pip install git+https://github.com/camenduru/simple-knn/")
281
- # os.system("pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html")
282
 
283
 
284
  def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
@@ -662,7 +787,7 @@ def demo_lhm(pose_estimator, face_detector, lhm, cfg):
662
  )
663
 
664
  gr.HTML(
665
- """<p><h4 style="color: red;"> Notes 1: Please input full-body image in case of detection errors. We simplify the pipeline in spaces: 1) using Rembg instead of SAM2; 2) limit the output video length to 10s; For best visual quality, try the inference code on Github instead.</h4></p>"""
666
  )
667
  gr.HTML(
668
  """<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""
 
70
  scale_box = box.scale(1.1, width=width, height=height)
71
  return scale_box
72
 
73
+ # def infer_preprocess_image(
74
+ # rgb_path,
75
+ # mask,
76
+ # intr,
77
+ # pad_ratio,
78
+ # bg_color,
79
+ # max_tgt_size,
80
+ # aspect_standard,
81
+ # enlarge_ratio,
82
+ # render_tgt_size,
83
+ # multiply,
84
+ # need_mask=True,
85
+ # ):
86
+ # """inferece
87
+ # image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
88
+ # max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
89
+ # render_tgt_size=source_size, multiply=14, need_mask=True)
90
+
91
+ # """
92
+
93
+ # rgb = np.array(Image.open(rgb_path))
94
+ # rgb_raw = rgb.copy()
95
+
96
+ # bbox = get_bbox(mask)
97
+ # bbox_list = bbox.get_box()
98
+
99
+ # rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
100
+ # mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
101
+
102
+ # h, w, _ = rgb.shape
103
+ # assert w < h
104
+ # cur_ratio = h / w
105
+ # scale_ratio = cur_ratio / aspect_standard
106
+
107
+ # target_w = int(min(w * scale_ratio, h))
108
+ # offset_w = (target_w - w) // 2
109
+ # # resize to target ratio.
110
+ # if offset_w > 0:
111
+ # rgb = np.pad(
112
+ # rgb,
113
+ # ((0, 0), (offset_w, offset_w), (0, 0)),
114
+ # mode="constant",
115
+ # constant_values=255,
116
+ # )
117
+ # mask = np.pad(
118
+ # mask,
119
+ # ((0, 0), (offset_w, offset_w)),
120
+ # mode="constant",
121
+ # constant_values=0,
122
+ # )
123
+ # else:
124
+ # offset_w = -offset_w
125
+ # rgb = rgb[:,offset_w:-offset_w,:]
126
+ # mask = mask[:,offset_w:-offset_w]
127
+
128
+ # # resize to target ratio.
129
+
130
+ # rgb = np.pad(
131
+ # rgb,
132
+ # ((0, 0), (offset_w, offset_w), (0, 0)),
133
+ # mode="constant",
134
+ # constant_values=255,
135
+ # )
136
+
137
+ # mask = np.pad(
138
+ # mask,
139
+ # ((0, 0), (offset_w, offset_w)),
140
+ # mode="constant",
141
+ # constant_values=0,
142
+ # )
143
+
144
+ # rgb = rgb / 255.0 # normalize to [0, 1]
145
+ # mask = mask / 255.0
146
+
147
+ # mask = (mask > 0.5).astype(np.float32)
148
+ # rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
149
+
150
+ # # resize to specific size require by preprocessor of smplx-estimator.
151
+ # rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
152
+ # mask = resize_image_keepaspect_np(mask, max_tgt_size)
153
+
154
+ # # crop image to enlarge human area.
155
+ # rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
156
+ # rgb, mask, aspect_standard, enlarge_ratio
157
+ # )
158
+ # if intr is not None:
159
+ # intr[0, 2] -= offset_x
160
+ # intr[1, 2] -= offset_y
161
+
162
+ # # resize to render_tgt_size for training
163
+
164
+ # tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
165
+ # cur_hw=rgb.shape[:2],
166
+ # aspect_standard=aspect_standard,
167
+ # tgt_size=render_tgt_size,
168
+ # multiply=multiply,
169
+ # )
170
+
171
+ # rgb = cv2.resize(
172
+ # rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
173
+ # )
174
+ # mask = cv2.resize(
175
+ # mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
176
+ # )
177
+
178
+ # if intr is not None:
179
+
180
+ # # ******************** Merge *********************** #
181
+ # intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
182
+ # assert (
183
+ # abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
184
+ # ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
185
+ # assert (
186
+ # abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
187
+ # ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
188
+
189
+ # # ******************** Merge *********************** #
190
+ # intr[0, 2] = rgb.shape[1] // 2
191
+ # intr[1, 2] = rgb.shape[0] // 2
192
+
193
+ # rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
194
+ # mask = (
195
+ # torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
196
+ # ) # [1, 1, H, W]
197
+ # return rgb, mask, intr
198
+
199
  def infer_preprocess_image(
200
  rgb_path,
201
  mask,
 
225
  rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
226
  mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
227
 
228
+
229
  h, w, _ = rgb.shape
230
  assert w < h
231
  cur_ratio = h / w
232
  scale_ratio = cur_ratio / aspect_standard
233
 
234
+
235
  target_w = int(min(w * scale_ratio, h))
236
+ if target_w - w >0:
237
+ offset_w = (target_w - w) // 2
238
+
239
  rgb = np.pad(
240
  rgb,
241
  ((0, 0), (offset_w, offset_w), (0, 0)),
242
  mode="constant",
243
  constant_values=255,
244
  )
245
+
246
  mask = np.pad(
247
  mask,
248
  ((0, 0), (offset_w, offset_w)),
 
250
  constant_values=0,
251
  )
252
  else:
253
+ target_h = w * aspect_standard
254
+ offset_h = int(target_h - h)
 
255
 
256
+ rgb = np.pad(
257
+ rgb,
258
+ ((offset_h, 0), (0, 0), (0, 0)),
259
+ mode="constant",
260
+ constant_values=255,
261
+ )
 
 
262
 
263
+ mask = np.pad(
264
+ mask,
265
+ ((offset_h, 0), (0, 0)),
266
+ mode="constant",
267
+ constant_values=0,
268
+ )
269
 
270
  rgb = rgb / 255.0 # normalize to [0, 1]
271
  mask = mask / 255.0
 
391
  from huggingface_hub import snapshot_download, hf_hub_download
392
  hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='assets.tar', local_dir="./")
393
  os.system("tar -xf assets.tar && rm assets.tar")
394
+ # hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM-0.5B.tar', local_dir="./")
395
+ # os.system("tar -xf LHM-0.5B.tar && rm LHM-0.5B.tar")
396
  hf_hub_download(repo_id="3DAIGC/LHM", repo_type='model', filename='LHM_prior_model.tar', local_dir="./")
397
  os.system("tar -xf LHM_prior_model.tar && rm LHM_prior_model.tar")
398
+ # replace the weight of full body
399
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='config.json', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
400
+ hf_hub_download(repo_id="3DAIGC/LHM-500M-HF", repo_type='model', filename='model.safetensors', local_dir="./exps/releases/video_human_benchmark/human-lrm-500M/step_060000/")
401
 
402
  def launch_env_not_compile_with_cuda():
403
  os.system("pip install chumpy")
404
  os.system("pip uninstall -y basicsr")
405
  os.system("pip install git+https://github.com/hitsz-zuoqi/BasicSR/")
406
  os.system("pip install numpy==1.23.0")
 
 
 
 
407
 
408
 
409
  def animation_infer(renderer, gs_model_list, query_points, smplx_params, render_c2ws, render_intrs, render_bg_colors):
 
787
  )
788
 
789
  gr.HTML(
790
+ """<p><h4 style="color: red;"> Notes 1: Glad to tell you that we have supported both full-body or half-body input! Try to test the robustness with half-body images!.</h4></p>"""
791
  )
792
  gr.HTML(
793
  """<p><h4 style="color: green;"> Notes 2: We drop ComfyUI Nodes of LHM on https://github.com/aigc3d/LHM/tree/feat/comfyui which support any character and any driven videos as input. Try it!</h4></p>"""