Spaces:
svjack
/
Runtime error

DyrusQZ commited on
Commit
f5e714b
·
1 Parent(s): b206b0b

move detail models in app

Browse files
Files changed (1) hide show
  1. app.py +319 -5
app.py CHANGED
@@ -25,6 +25,138 @@ import os
25
  from engine.pose_estimation.pose_estimator import PoseEstimator
26
  from LHM.utils.face_detector import VGGHeadDetector
27
  from LHM.utils.hf_hub import wrap_model_hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def parse_configs():
30
 
@@ -193,11 +325,192 @@ def demo_lhm(pose_estimator, face_detector, lhm_model, cfg):
193
  motion_img_need_mask = cfg.get("motion_img_need_mask", False) # False
194
  vis_motion = cfg.get("vis_motion", False) # False
195
 
196
- parsing_mask = parsing(image_raw)
197
-
198
- input = cv2.imread(img_path)
199
- output = remove(input)
200
- alpha = output[:,:,3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # self.infer_single(
203
  # image_path,
@@ -221,6 +534,7 @@ def demo_lhm(pose_estimator, face_detector, lhm_model, cfg):
221
  # gradio_video_save_path=dump_video_path
222
  # ))
223
 
 
224
  # if status:
225
  # return dump_image_path, dump_video_path
226
  # else:
 
25
  from engine.pose_estimation.pose_estimator import PoseEstimator
26
  from LHM.utils.face_detector import VGGHeadDetector
27
  from LHM.utils.hf_hub import wrap_model_hub
28
+ from LHM.runners.infer.utils import (
29
+ calc_new_tgt_size_by_aspect,
30
+ center_crop_according_to_mask,
31
+ prepare_motion_seqs,
32
+ resize_image_keepaspect_np,
33
+ )
34
+
35
+ def infer_preprocess_image(
36
+ rgb_path,
37
+ mask,
38
+ intr,
39
+ pad_ratio,
40
+ bg_color,
41
+ max_tgt_size,
42
+ aspect_standard,
43
+ enlarge_ratio,
44
+ render_tgt_size,
45
+ multiply,
46
+ need_mask=True,
47
+ ):
48
+ """inferece
49
+ image, _, _ = preprocess_image(image_path, mask_path=None, intr=None, pad_ratio=0, bg_color=1.0,
50
+ max_tgt_size=896, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
51
+ render_tgt_size=source_size, multiply=14, need_mask=True)
52
+
53
+ """
54
+
55
+ rgb = np.array(Image.open(rgb_path))
56
+ rgb_raw = rgb.copy()
57
+
58
+ bbox = get_bbox(mask)
59
+ bbox_list = bbox.get_box()
60
+
61
+ rgb = rgb[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
62
+ mask = mask[bbox_list[1] : bbox_list[3], bbox_list[0] : bbox_list[2]]
63
+
64
+ h, w, _ = rgb.shape
65
+ assert w < h
66
+ cur_ratio = h / w
67
+ scale_ratio = cur_ratio / aspect_standard
68
+
69
+ target_w = int(min(w * scale_ratio, h))
70
+ offset_w = (target_w - w) // 2
71
+ # resize to target ratio.
72
+ if offset_w > 0:
73
+ rgb = np.pad(
74
+ rgb,
75
+ ((0, 0), (offset_w, offset_w), (0, 0)),
76
+ mode="constant",
77
+ constant_values=255,
78
+ )
79
+ mask = np.pad(
80
+ mask,
81
+ ((0, 0), (offset_w, offset_w)),
82
+ mode="constant",
83
+ constant_values=0,
84
+ )
85
+ else:
86
+ offset_w = -offset_w
87
+ rgb = rgb[:,offset_w:-offset_w,:]
88
+ mask = mask[:,offset_w:-offset_w]
89
+
90
+ # resize to target ratio.
91
+
92
+ rgb = np.pad(
93
+ rgb,
94
+ ((0, 0), (offset_w, offset_w), (0, 0)),
95
+ mode="constant",
96
+ constant_values=255,
97
+ )
98
+
99
+ mask = np.pad(
100
+ mask,
101
+ ((0, 0), (offset_w, offset_w)),
102
+ mode="constant",
103
+ constant_values=0,
104
+ )
105
+
106
+ rgb = rgb / 255.0 # normalize to [0, 1]
107
+ mask = mask / 255.0
108
+
109
+ mask = (mask > 0.5).astype(np.float32)
110
+ rgb = rgb[:, :, :3] * mask[:, :, None] + bg_color * (1 - mask[:, :, None])
111
+
112
+ # resize to specific size require by preprocessor of smplx-estimator.
113
+ rgb = resize_image_keepaspect_np(rgb, max_tgt_size)
114
+ mask = resize_image_keepaspect_np(mask, max_tgt_size)
115
+
116
+ # crop image to enlarge human area.
117
+ rgb, mask, offset_x, offset_y = center_crop_according_to_mask(
118
+ rgb, mask, aspect_standard, enlarge_ratio
119
+ )
120
+ if intr is not None:
121
+ intr[0, 2] -= offset_x
122
+ intr[1, 2] -= offset_y
123
+
124
+ # resize to render_tgt_size for training
125
+
126
+ tgt_hw_size, ratio_y, ratio_x = calc_new_tgt_size_by_aspect(
127
+ cur_hw=rgb.shape[:2],
128
+ aspect_standard=aspect_standard,
129
+ tgt_size=render_tgt_size,
130
+ multiply=multiply,
131
+ )
132
+
133
+ rgb = cv2.resize(
134
+ rgb, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
135
+ )
136
+ mask = cv2.resize(
137
+ mask, dsize=(tgt_hw_size[1], tgt_hw_size[0]), interpolation=cv2.INTER_AREA
138
+ )
139
+
140
+ if intr is not None:
141
+
142
+ # ******************** Merge *********************** #
143
+ intr = scale_intrs(intr, ratio_x=ratio_x, ratio_y=ratio_y)
144
+ assert (
145
+ abs(intr[0, 2] * 2 - rgb.shape[1]) < 2.5
146
+ ), f"{intr[0, 2] * 2}, {rgb.shape[1]}"
147
+ assert (
148
+ abs(intr[1, 2] * 2 - rgb.shape[0]) < 2.5
149
+ ), f"{intr[1, 2] * 2}, {rgb.shape[0]}"
150
+
151
+ # ******************** Merge *********************** #
152
+ intr[0, 2] = rgb.shape[1] // 2
153
+ intr[1, 2] = rgb.shape[0] // 2
154
+
155
+ rgb = torch.from_numpy(rgb).float().permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
156
+ mask = (
157
+ torch.from_numpy(mask[:, :, None]).float().permute(2, 0, 1).unsqueeze(0)
158
+ ) # [1, 1, H, W]
159
+ return rgb, mask, intr
160
 
161
  def parse_configs():
162
 
 
325
  motion_img_need_mask = cfg.get("motion_img_need_mask", False) # False
326
  vis_motion = cfg.get("vis_motion", False) # False
327
 
328
+
329
+ input_np = cv2.imread(image_raw)
330
+ output_np = remove(input_np)
331
+ parsing_mask = output_np[:,:,3]
332
+
333
+ # prepare reference image
334
+ image, _, _ = infer_preprocess_image(
335
+ image_raw,
336
+ mask=parsing_mask,
337
+ intr=None,
338
+ pad_ratio=0,
339
+ bg_color=1.0,
340
+ max_tgt_size=896,
341
+ aspect_standard=aspect_standard,
342
+ enlarge_ratio=[1.0, 1.0],
343
+ render_tgt_size=source_size,
344
+ multiply=14,
345
+ need_mask=True,
346
+ )
347
+
348
+ try:
349
+ rgb = np.array(Image.open(image_path))
350
+ rgb = torch.from_numpy(rgb).permute(2, 0, 1)
351
+ bbox = face_detector.detect_face(rgb)
352
+ head_rgb = rgb[:, int(bbox[1]) : int(bbox[3]), int(bbox[0]) : int(bbox[2])]
353
+ head_rgb = head_rgb.permute(1, 2, 0)
354
+ src_head_rgb = head_rgb.cpu().numpy()
355
+ except:
356
+ print("w/o head input!")
357
+ src_head_rgb = np.zeros((112, 112, 3), dtype=np.uint8)
358
+
359
+ # resize to dino size
360
+ try:
361
+ src_head_rgb = cv2.resize(
362
+ src_head_rgb,
363
+ dsize=(cfg.src_head_size, cfg.src_head_size),
364
+ interpolation=cv2.INTER_AREA,
365
+ ) # resize to dino size
366
+ except:
367
+ src_head_rgb = np.zeros(
368
+ (cfg.src_head_size, cfg.src_head_size, 3), dtype=np.uint8
369
+ )
370
+
371
+ src_head_rgb = (
372
+ torch.from_numpy(src_head_rgb / 255.0).float().permute(2, 0, 1).unsqueeze(0)
373
+ ) # [1, 3, H, W]
374
+
375
+ save_ref_img_path = os.path.join(
376
+ dump_tmp_dir, "output.png"
377
+ )
378
+ vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(
379
+ np.uint8
380
+ )
381
+ Image.fromarray(vis_ref_img).save(save_ref_img_path)
382
+
383
+ # read motion seq
384
+ motion_name = os.path.dirname(
385
+ motion_seqs_dir[:-1] if motion_seqs_dir[-1] == "/" else motion_seqs_dir
386
+ )
387
+ motion_name = os.path.basename(motion_name)
388
+
389
+ motion_seq = prepare_motion_seqs(
390
+ motion_seqs_dir,
391
+ None,
392
+ save_root=dump_tmp_dir,
393
+ fps=30,
394
+ bg_color=1.0,
395
+ aspect_standard=aspect_standard,
396
+ enlarge_ratio=[1.0, 1, 0],
397
+ render_image_res=render_size,
398
+ multiply=16,
399
+ need_mask=motion_img_need_mask,
400
+ vis_motion=vis_motion,
401
+ )
402
+
403
+ camera_size = len(motion_seq["motion_seqs"])
404
+ shape_param = shape_pose.beta
405
+
406
+ device = "cuda"
407
+ dtype = torch.float32
408
+ shape_param = torch.tensor(shape_param, dtype=dtype).unsqueeze(0)
409
+
410
+ lhm.to(dtype)
411
+
412
+ smplx_params = motion_seq['smplx_params']
413
+ smplx_params['betas'] = shape_param.to(device)
414
+
415
+ gs_model_list, query_points, transform_mat_neutral_pose = lhm.infer_single_view(
416
+ image.unsqueeze(0).to(device, dtype),
417
+ src_head_rgb.unsqueeze(0).to(device, dtype),
418
+ None,
419
+ None,
420
+ render_c2ws=motion_seq["render_c2ws"].to(device),
421
+ render_intrs=motion_seq["render_intrs"].to(device),
422
+ render_bg_colors=motion_seq["render_bg_colors"].to(device),
423
+ smplx_params={
424
+ k: v.to(device) for k, v in smplx_params.items()
425
+ },
426
+ )
427
+
428
+
429
+ # rendering !!!!
430
+
431
+ start_time = time.time()
432
+ batch_dict = dict()
433
+ batch_size = 40 # avoid memeory out!
434
+
435
+ for batch_i in range(0, camera_size, batch_size):
436
+ with torch.no_grad():
437
+ # TODO check device and dtype
438
+ # dict_keys(['comp_rgb', 'comp_rgb_bg', 'comp_mask', 'comp_depth', '3dgs'])
439
+ keys = [
440
+ "root_pose",
441
+ "body_pose",
442
+ "jaw_pose",
443
+ "leye_pose",
444
+ "reye_pose",
445
+ "lhand_pose",
446
+ "rhand_pose",
447
+ "trans",
448
+ "focal",
449
+ "princpt",
450
+ "img_size_wh",
451
+ "expr",
452
+ ]
453
+ batch_smplx_params = dict()
454
+ batch_smplx_params["betas"] = shape_param.to(device)
455
+ batch_smplx_params['transform_mat_neutral_pose'] = transform_mat_neutral_pose
456
+ for key in keys:
457
+ batch_smplx_params[key] = motion_seq["smplx_params"][key][
458
+ :, batch_i : batch_i + batch_size
459
+ ].to(device)
460
+
461
+ res = self.model.animation_infer(gs_model_list, query_points, batch_smplx_params,
462
+ render_c2ws=motion_seq["render_c2ws"][
463
+ :, batch_i : batch_i + batch_size
464
+ ].to(device),
465
+ render_intrs=motion_seq["render_intrs"][
466
+ :, batch_i : batch_i + batch_size
467
+ ].to(device),
468
+ render_bg_colors=motion_seq["render_bg_colors"][
469
+ :, batch_i : batch_i + batch_size
470
+ ].to(device),
471
+ )
472
+
473
+ for accumulate_key in ["comp_rgb", "comp_mask"]:
474
+ if accumulate_key not in batch_dict:
475
+ batch_dict[accumulate_key] = []
476
+ batch_dict[accumulate_key].append(res[accumulate_key].detach().cpu())
477
+ del res
478
+ torch.cuda.empty_cache()
479
+
480
+ for accumulate_key in ["comp_rgb", "comp_mask"]:
481
+ batch_dict[accumulate_key] = torch.cat(batch_dict[accumulate_key], dim=0)
482
+
483
+ print(f"time elapsed: {time.time() - start_time}")
484
+ rgb = batch_dict["comp_rgb"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
485
+ mask = batch_dict["comp_mask"].detach().cpu().numpy() # [Nv, H, W, 3], 0-1
486
+ mask[mask < 0.5] = 0.0
487
+
488
+ rgb = rgb * mask + (1 - mask) * 1
489
+ rgb = np.clip(rgb * 255, 0, 255).astype(np.uint8)
490
+
491
+ if vis_motion:
492
+ # print(rgb.shape, motion_seq["vis_motion_render"].shape)
493
+
494
+ vis_ref_img = np.tile(
495
+ cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]))[
496
+ None, :, :, :
497
+ ],
498
+ (rgb.shape[0], 1, 1, 1),
499
+ )
500
+ rgb = np.concatenate(
501
+ [rgb, motion_seq["vis_motion_render"], vis_ref_img], axis=2
502
+ )
503
+
504
+ os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
505
+
506
+ images_to_video(
507
+ rgb,
508
+ output_path=dump_video_path,
509
+ fps=render_fps,
510
+ gradio_codec=False,
511
+ verbose=True,
512
+ )
513
+
514
 
515
  # self.infer_single(
516
  # image_path,
 
534
  # gradio_video_save_path=dump_video_path
535
  # ))
536
 
537
+ return dump_image_path, dump_video_path
538
  # if status:
539
  # return dump_image_path, dump_video_path
540
  # else: