Spaces:
Running
Running
File size: 9,793 Bytes
344feb9 10ae2d1 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 b878264 a612409 767c99b b11843e e999ea2 344feb9 767c99b 344feb9 767c99b 6278ae1 767c99b 3241598 767c99b a4870c8 767c99b a4870c8 767c99b 344feb9 767c99b e185ea0 767c99b 41a5410 344feb9 2106d07 41a5410 2106d07 70e91c6 41a5410 70e91c6 41a5410 344feb9 4a8d08c 41a5410 344feb9 a612409 344feb9 3ed85f1 344feb9 a612409 2a41939 344feb9 3ed85f1 e999ea2 3ed85f1 344feb9 4a8d08c 344feb9 e999ea2 344feb9 4a8d08c 344feb9 a612409 3ed85f1 2a41939 344feb9 e999ea2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
import os
import gc
import cv2
import gradio as gr
import numpy as np
import matplotlib.cm as cm
import matplotlib # New import for the updated colormap API
import subprocess
import sys
from utils.dc_utils import read_video_frames, save_video
title = """**RGBD SBS output**"""
description = """**Video Depth Anything** + RGBD sbs output for viewing with Looking Glass Factory displays.
Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
def stitch_rgbd_videos(
processed_video: str,
depth_vis_video: str,
max_len: int = -1,
target_fps: int = -1,
max_res: int = 1280,
stitch: bool = True,
grayscale: bool = True,
convert_from_color: bool = True,
blur: float = 0.3,
output_dir: str = './outputs',
input_size: int = 518,
):
video_name = os.path.basename(processed_video)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
stitched_video_path = None
if stitch:
# For stitching: read the original video in full resolution (without downscaling).
full_frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
print(f"Depth frame shape: {depths[0].shape}, dtype: {depths[0].dtype}, min: {depths[0].min()}, max: {depths[0].max()}")
# For each frame, create a visual depth image from the inferenced depths.
d_min, d_max = np.min(depths), np.max(depths)
print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
stitched_frames = []
for i in range(min(len(full_frames), len(depths))):
rgb_full = full_frames[i] # Full-resolution RGB frame.
depth_frame = depths[i] # Already in uint8 format
# Handle the case where depth is already in a 3-channel format
if len(depth_frame.shape) == 3 and depth_frame.shape[2] == 3:
# The depth is already a color or grayscale image with 3 channels
if grayscale:
if convert_from_color:
# Convert to grayscale if it's a color image
depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
depth_vis = np.stack([depth_gray] * 3, axis=-1)
else:
# Assume it's already the right format
depth_vis = depth_frame
else:
if depth_frame.max() > 0: # Ensure we have valid depth data
# Use the inferno colormap if requested
cmap = matplotlib.colormaps.get_cmap("inferno")
# Convert to single channel first
depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
# Normalize to 0-1 range for colormap
depth_norm = depth_gray / 255.0
# Apply colormap
depth_vis = (cmap(depth_norm)[..., :3] * 255).astype(np.uint8)
else:
# If zero depth, just use the original
depth_vis = depth_frame
else:
# Process as in original code (single channel depth)
if d_max == d_min:
d_max = d_min + 1
# Normalize the depth frame to the range [0, 255]
depth_norm = np.clip((depth_frame - d_min) / (d_max - d_min) * 255, 0, 255).astype(np.uint8)
# Ensure depth_norm is 2D (remove singleton dimensions if present)
if depth_norm.ndim == 3:
depth_norm = np.squeeze(depth_norm)
# Generate depth visualization:
if grayscale:
if convert_from_color:
# First, generate a color depth image using the inferno colormap,
# then convert that color image to grayscale.
cmap = matplotlib.colormaps.get_cmap("inferno")
depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
depth_vis = np.stack([depth_gray] * 3, axis=-1)
else:
# Directly generate a grayscale image from the normalized depth values.
depth_vis = np.stack([depth_norm] * 3, axis=-1)
else:
# Generate a color depth image using the inferno colormap.
cmap = matplotlib.colormaps.get_cmap("inferno")
depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
# Apply Gaussian blur if requested
if blur > 0:
kernel_size = max(1, int(blur * 20) * 2 + 1) # Ensures an odd kernel size.
kernel_size = min(kernel_size, 31) # Cap kernel size at 31 (OpenCV limitation)
depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
# Resize the depth visualization to match the full-resolution RGB frame.
H_full, W_full = rgb_full.shape[:2]
depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
depth_vis_resized = depth_vis_resized.astype(np.uint8) # Ensure uint8
if len(depth_vis_resized.shape) == 2:
depth_vis_resized = cv2.cvtColor(depth_vis_resized, cv2.COLOR_GRAY2BGR)
# Ensure both are the same type (commonly uint8):
if rgb_full.dtype != depth_vis_resized.dtype:
depth_vis_resized = depth_vis_resized.astype(rgb_full.dtype)
# Now safely concatenate.
stitched = cv2.hconcat([rgb_full, depth_vis_resized])
stitched_frames.append(stitched)
del rgb_full, depth_vis_resized, stitched
gc.collect() # Force Python to free unused memory
stitched_frames = np.array(stitched_frames)
# Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
base_name = os.path.splitext(video_name)[0]
short_name = base_name[:20]
stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
save_video(stitched_frames, stitched_video_path, fps=target_fps)
# Merge audio from the input video into the stitched video using ffmpeg.
temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
cmd = [
"ffmpeg",
"-y",
"-i", stitched_video_path,
"-i", processed_video,
"-c:v", "copy",
"-c:a", "aac",
"-map", "0:v:0",
"-map", "1:a:0?",
"-shortest",
temp_audio_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.replace(temp_audio_path, stitched_video_path)
# Return stitched video.
return stitched_video_path
def construct_demo():
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
# Video input component for file upload.
processed_video = gr.Video(label="Input Video with Audio")
with gr.Column(scale=1):
depth_vis_video = gr.Video(label="Depth Video")
with gr.Column(scale=2):
with gr.Row(equal_height=True):
stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
with gr.Row(equal_height=True):
with gr.Column(scale=2):
with gr.Accordion("Advanced Settings", open=False):
max_len = gr.Slider(label="Max process length", minimum=-1, maximum=1000, value=-1, step=1)
target_fps = gr.Slider(label="Target FPS", minimum=-1, maximum=30, value=-1, step=1)
max_res = gr.Slider(label="Max side resolution", minimum=480, maximum=1920, value=1920, step=1)
stitch_option = gr.Checkbox(label="Stitch RGB & Depth Videos", value=True)
grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=True)
convert_from_color_option = gr.Checkbox(label="Convert Grayscale from Color", value=True)
blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur (can reduce edge artifacts on display)", value=0.3)
generate_btn = gr.Button("Generate")
with gr.Column(scale=1):
pass
generate_btn.click(
fn=stitch_rgbd_videos,
inputs=[processed_video, depth_vis_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider],
outputs=stitched_video,
)
return demo
if __name__ == "__main__":
demo = construct_demo()
demo.queue(max_size=4).launch() |