Spaces:
Running
Running
Commit
·
2ee3801
1
Parent(s):
fb96f4f
add: progress bar
Browse files- app.py +13 -4
- common/inference_3d.py +4 -3
- common/visualization.py +3 -1
- joints_detectors/Alphapose/gene_npz.py +4 -4
- videopose_PSTMO.py +8 -8
app.py
CHANGED
|
@@ -1,12 +1,21 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from videopose_PSTMO import gr_video2mc
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
-
def Video2MC(video):
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
return output_path, output_path
|
| 8 |
|
|
|
|
| 9 |
iface = gr.Interface(fn=Video2MC,
|
| 10 |
inputs=gr.Video(),
|
| 11 |
-
outputs=["file", "text"]
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from videopose_PSTMO import gr_video2mc
|
| 3 |
+
import os
|
| 4 |
|
| 5 |
|
| 6 |
+
def Video2MC(video, progress = gr.Progress(track_tqdm=True)):
|
| 7 |
+
|
| 8 |
+
progress(1.0, desc="Step 0: Starting")
|
| 9 |
+
output_path = gr_video2mc(video, progress)
|
| 10 |
+
|
| 11 |
return output_path, output_path
|
| 12 |
|
| 13 |
+
|
| 14 |
iface = gr.Interface(fn=Video2MC,
|
| 15 |
inputs=gr.Video(),
|
| 16 |
+
outputs=["file", "text"],
|
| 17 |
+
examples=[os.path.join(os.path.dirname(__file__),
|
| 18 |
+
"input_videos/kun_test_5sec.mp4")],
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
iface.queue(concurrency_count=10).launch()
|
common/inference_3d.py
CHANGED
|
@@ -48,7 +48,7 @@ def input_augmentation(input_2D, input_2D_flip, model_trans, joints_left, joints
|
|
| 48 |
|
| 49 |
return input_2D, output_3D, output_3D_VTE
|
| 50 |
|
| 51 |
-
def step(opt, dataLoader, model, optimizer=None, epoch=None):
|
| 52 |
model_trans = model['trans']
|
| 53 |
|
| 54 |
model_trans.eval()
|
|
@@ -57,6 +57,7 @@ def step(opt, dataLoader, model, optimizer=None, epoch=None):
|
|
| 57 |
joints_right = [1, 2, 3, 14, 15, 16]
|
| 58 |
epoch_cnt=0
|
| 59 |
out = []
|
|
|
|
| 60 |
for _, batch, batch_2d, batch_2d_flip in dataLoader.next_epoch():
|
| 61 |
#[gt_3D, input_2D] = get_varialbe([batch, batch_2d])
|
| 62 |
#input_2D = Variable(batch_2d).contiguous().type(torch.cuda.FloatTensor)
|
|
@@ -102,6 +103,6 @@ def step(opt, dataLoader, model, optimizer=None, epoch=None):
|
|
| 102 |
epoch_cnt +=1
|
| 103 |
return out.numpy()
|
| 104 |
|
| 105 |
-
def val(opt, val_loader, model):
|
| 106 |
with torch.no_grad():
|
| 107 |
-
return step(opt, val_loader, model)
|
|
|
|
| 48 |
|
| 49 |
return input_2D, output_3D, output_3D_VTE
|
| 50 |
|
| 51 |
+
def step(opt, dataLoader, model, progress, optimizer=None, epoch=None):
|
| 52 |
model_trans = model['trans']
|
| 53 |
|
| 54 |
model_trans.eval()
|
|
|
|
| 57 |
joints_right = [1, 2, 3, 14, 15, 16]
|
| 58 |
epoch_cnt=0
|
| 59 |
out = []
|
| 60 |
+
# probar = progress.tqdm(total=1, desc="Step 2: Prediction (Please wait)")
|
| 61 |
for _, batch, batch_2d, batch_2d_flip in dataLoader.next_epoch():
|
| 62 |
#[gt_3D, input_2D] = get_varialbe([batch, batch_2d])
|
| 63 |
#input_2D = Variable(batch_2d).contiguous().type(torch.cuda.FloatTensor)
|
|
|
|
| 103 |
epoch_cnt +=1
|
| 104 |
return out.numpy()
|
| 105 |
|
| 106 |
+
def val(opt, val_loader, model, progress):
|
| 107 |
with torch.no_grad():
|
| 108 |
+
return step(opt, val_loader, model, progress)
|
common/visualization.py
CHANGED
|
@@ -50,7 +50,7 @@ def downsample_tensor(X, factor):
|
|
| 50 |
return np.mean(X[:length].reshape(-1, factor, *X.shape[1:]), axis=1)
|
| 51 |
|
| 52 |
|
| 53 |
-
def render_animation(keypoints, poses, skeleton, fps, bitrate, azim, output, viewport,
|
| 54 |
limit=-1, downsample=1, size=6, input_video_path=None, input_video_skip=0):
|
| 55 |
"""
|
| 56 |
TODO
|
|
@@ -126,6 +126,7 @@ def render_animation(keypoints, poses, skeleton, fps, bitrate, azim, output, vie
|
|
| 126 |
|
| 127 |
parents = skeleton.parents()
|
| 128 |
pbar = tqdm(total=limit)
|
|
|
|
| 129 |
|
| 130 |
def update_video(i):
|
| 131 |
nonlocal initialized, image, lines, points
|
|
@@ -177,6 +178,7 @@ def render_animation(keypoints, poses, skeleton, fps, bitrate, azim, output, vie
|
|
| 177 |
points.set_offsets(keypoints[i])
|
| 178 |
|
| 179 |
pbar.update()
|
|
|
|
| 180 |
|
| 181 |
fig.tight_layout()
|
| 182 |
|
|
|
|
| 50 |
return np.mean(X[:length].reshape(-1, factor, *X.shape[1:]), axis=1)
|
| 51 |
|
| 52 |
|
| 53 |
+
def render_animation(keypoints, poses, skeleton, fps, bitrate, azim, output, progress, viewport,
|
| 54 |
limit=-1, downsample=1, size=6, input_video_path=None, input_video_skip=0):
|
| 55 |
"""
|
| 56 |
TODO
|
|
|
|
| 126 |
|
| 127 |
parents = skeleton.parents()
|
| 128 |
pbar = tqdm(total=limit)
|
| 129 |
+
# probar = progress.tqdm(total=limit, desc="Step 3: 3D Rendering")
|
| 130 |
|
| 131 |
def update_video(i):
|
| 132 |
nonlocal initialized, image, lines, points
|
|
|
|
| 178 |
points.set_offsets(keypoints[i])
|
| 179 |
|
| 180 |
pbar.update()
|
| 181 |
+
# probar.update()
|
| 182 |
|
| 183 |
fig.tight_layout()
|
| 184 |
|
joints_detectors/Alphapose/gene_npz.py
CHANGED
|
@@ -32,8 +32,8 @@ def image_interface(model, image):
|
|
| 32 |
pass
|
| 33 |
|
| 34 |
|
| 35 |
-
def generate_kpts(video_file):
|
| 36 |
-
final_result, video_name = handle_video(video_file)
|
| 37 |
|
| 38 |
# ============ Changing ++++++++++
|
| 39 |
|
|
@@ -67,7 +67,7 @@ def generate_kpts(video_file):
|
|
| 67 |
return kpts
|
| 68 |
|
| 69 |
|
| 70 |
-
def handle_video(video_file):
|
| 71 |
# =========== common ===============
|
| 72 |
args.video = video_file
|
| 73 |
base_name = os.path.basename(args.video)
|
|
@@ -136,7 +136,7 @@ def handle_video(video_file):
|
|
| 136 |
print('Start pose estimation...')
|
| 137 |
im_names_desc = tqdm(range(data_loader.length()))
|
| 138 |
batchSize = args.posebatch
|
| 139 |
-
for i in
|
| 140 |
|
| 141 |
start_time = getTime()
|
| 142 |
with torch.no_grad():
|
|
|
|
| 32 |
pass
|
| 33 |
|
| 34 |
|
| 35 |
+
def generate_kpts(video_file, progress):
|
| 36 |
+
final_result, video_name = handle_video(video_file, progress)
|
| 37 |
|
| 38 |
# ============ Changing ++++++++++
|
| 39 |
|
|
|
|
| 67 |
return kpts
|
| 68 |
|
| 69 |
|
| 70 |
+
def handle_video(video_file, progress):
|
| 71 |
# =========== common ===============
|
| 72 |
args.video = video_file
|
| 73 |
base_name = os.path.basename(args.video)
|
|
|
|
| 136 |
print('Start pose estimation...')
|
| 137 |
im_names_desc = tqdm(range(data_loader.length()))
|
| 138 |
batchSize = args.posebatch
|
| 139 |
+
for i in progress.tqdm(range(data_loader.length()), desc="Step 1: 2D Detecting"):
|
| 140 |
|
| 141 |
start_time = getTime()
|
| 142 |
with torch.no_grad():
|
videopose_PSTMO.py
CHANGED
|
@@ -60,7 +60,7 @@ class Skeleton:
|
|
| 60 |
return [1, 2, 3, 14, 15, 16]
|
| 61 |
|
| 62 |
|
| 63 |
-
def main(args):
|
| 64 |
detector_2d = get_detector_2d(args.detector_2d)
|
| 65 |
|
| 66 |
assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'
|
|
@@ -69,7 +69,7 @@ def main(args):
|
|
| 69 |
#args.input_npz = './outputs/alpha_pose_skiing_cut/skiing_cut.npz'
|
| 70 |
if not args.input_npz:
|
| 71 |
video_name = args.viz_video
|
| 72 |
-
keypoints = detector_2d(video_name)
|
| 73 |
else:
|
| 74 |
npz = np.load(args.input_npz)
|
| 75 |
keypoints = npz['kpts'] # (N, 17, 2)
|
|
@@ -133,7 +133,7 @@ def main(args):
|
|
| 133 |
pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation, shuffle=False,
|
| 134 |
kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
|
| 135 |
|
| 136 |
-
prediction = val(args, gen, model)
|
| 137 |
|
| 138 |
# save 3D joint points
|
| 139 |
# np.save(f'outputs/test_3d_{args.video_name}_output.npy', prediction, allow_pickle=True)
|
|
@@ -160,7 +160,7 @@ def main(args):
|
|
| 160 |
|
| 161 |
from common.visualization import render_animation
|
| 162 |
render_animation(input_keypoints, anim_output,
|
| 163 |
-
Skeleton(), 25, args.viz_bitrate, np.array(70., dtype=np.float32), args.viz_output,
|
| 164 |
limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size,
|
| 165 |
input_video_path=args.viz_video, viewport=(1000, 1002),
|
| 166 |
input_video_skip=args.viz_skip)
|
|
@@ -171,7 +171,7 @@ def main(args):
|
|
| 171 |
return output_dir_dict
|
| 172 |
|
| 173 |
|
| 174 |
-
def inference_video(video_path, detector_2d):
|
| 175 |
"""
|
| 176 |
Do image -> 2d points -> 3d points to video.
|
| 177 |
:param detector_2d: used 2d joints detector. Can be {alpha_pose, hr_pose}
|
|
@@ -189,14 +189,14 @@ def inference_video(video_path, detector_2d):
|
|
| 189 |
args.evaluate = 'pretrained_h36m_detectron_coco.bin'
|
| 190 |
|
| 191 |
with Timer(video_path):
|
| 192 |
-
output_dir_dict = main(args)
|
| 193 |
|
| 194 |
output_dir_dict["output_videos"] = args.viz_output
|
| 195 |
output_dir_dict["video_name"] = args.video_name
|
| 196 |
return output_dir_dict
|
| 197 |
|
| 198 |
|
| 199 |
-
def gr_video2mc(video_path):
|
| 200 |
|
| 201 |
if not os.path.exists('output_3Dpose_npy'):
|
| 202 |
os.makedirs('output_3Dpose_npy')
|
|
@@ -208,7 +208,7 @@ def gr_video2mc(video_path):
|
|
| 208 |
os.makedirs('output_videos')
|
| 209 |
|
| 210 |
FPS_mine_imator = 30
|
| 211 |
-
output_dir_dict = inference_video(video_path, 'alpha_pose')
|
| 212 |
Hk.hpe2keyframes(output_dir_dict['npy'], FPS_mine_imator, f"output_miframes/{output_dir_dict['video_name']}.miframes")
|
| 213 |
return os.path.abspath(f"output_miframes/{output_dir_dict['video_name']}.miframes")
|
| 214 |
|
|
|
|
| 60 |
return [1, 2, 3, 14, 15, 16]
|
| 61 |
|
| 62 |
|
| 63 |
+
def main(args, progress):
|
| 64 |
detector_2d = get_detector_2d(args.detector_2d)
|
| 65 |
|
| 66 |
assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)'
|
|
|
|
| 69 |
#args.input_npz = './outputs/alpha_pose_skiing_cut/skiing_cut.npz'
|
| 70 |
if not args.input_npz:
|
| 71 |
video_name = args.viz_video
|
| 72 |
+
keypoints = detector_2d(video_name, progress)
|
| 73 |
else:
|
| 74 |
npz = np.load(args.input_npz)
|
| 75 |
keypoints = npz['kpts'] # (N, 17, 2)
|
|
|
|
| 133 |
pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation, shuffle=False,
|
| 134 |
kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right)
|
| 135 |
|
| 136 |
+
prediction = val(args, gen, model, progress)
|
| 137 |
|
| 138 |
# save 3D joint points
|
| 139 |
# np.save(f'outputs/test_3d_{args.video_name}_output.npy', prediction, allow_pickle=True)
|
|
|
|
| 160 |
|
| 161 |
from common.visualization import render_animation
|
| 162 |
render_animation(input_keypoints, anim_output,
|
| 163 |
+
Skeleton(), 25, args.viz_bitrate, np.array(70., dtype=np.float32), args.viz_output, progress,
|
| 164 |
limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size,
|
| 165 |
input_video_path=args.viz_video, viewport=(1000, 1002),
|
| 166 |
input_video_skip=args.viz_skip)
|
|
|
|
| 171 |
return output_dir_dict
|
| 172 |
|
| 173 |
|
| 174 |
+
def inference_video(video_path, detector_2d, progress):
|
| 175 |
"""
|
| 176 |
Do image -> 2d points -> 3d points to video.
|
| 177 |
:param detector_2d: used 2d joints detector. Can be {alpha_pose, hr_pose}
|
|
|
|
| 189 |
args.evaluate = 'pretrained_h36m_detectron_coco.bin'
|
| 190 |
|
| 191 |
with Timer(video_path):
|
| 192 |
+
output_dir_dict = main(args, progress)
|
| 193 |
|
| 194 |
output_dir_dict["output_videos"] = args.viz_output
|
| 195 |
output_dir_dict["video_name"] = args.video_name
|
| 196 |
return output_dir_dict
|
| 197 |
|
| 198 |
|
| 199 |
+
def gr_video2mc(video_path, progress):
|
| 200 |
|
| 201 |
if not os.path.exists('output_3Dpose_npy'):
|
| 202 |
os.makedirs('output_3Dpose_npy')
|
|
|
|
| 208 |
os.makedirs('output_videos')
|
| 209 |
|
| 210 |
FPS_mine_imator = 30
|
| 211 |
+
output_dir_dict = inference_video(video_path, 'alpha_pose', progress)
|
| 212 |
Hk.hpe2keyframes(output_dir_dict['npy'], FPS_mine_imator, f"output_miframes/{output_dir_dict['video_name']}.miframes")
|
| 213 |
return os.path.abspath(f"output_miframes/{output_dir_dict['video_name']}.miframes")
|
| 214 |
|