Spaces:
Running
Running
File size: 9,711 Bytes
344feb9 10ae2d1 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 6bfa5c9 de898e9 6bfa5c9 de898e9 767c99b 6bfa5c9 de898e9 e999ea2 6bfa5c9 767c99b 6bfa5c9 6278ae1 6bfa5c9 0650a5c 767c99b 0650a5c 767c99b 6bfa5c9 0650a5c 767c99b 0650a5c e185ea0 767c99b 41a5410 344feb9 2106d07 41a5410 6bfa5c9 41a5410 6bfa5c9 41a5410 6bfa5c9 41a5410 6bfa5c9 344feb9 3ed85f1 344feb9 6bfa5c9 344feb9 a612409 2a41939 344feb9 3ed85f1 e999ea2 3ed85f1 344feb9 4a8d08c 344feb9 e999ea2 344feb9 4a8d08c 344feb9 a612409 3ed85f1 2a41939 344feb9 e9985cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
import os
import gc
import cv2
import gradio as gr
import numpy as np
import matplotlib.cm as cm
import matplotlib # New import for the updated colormap API
import subprocess
import sys
from utils.dc_utils import read_video_frames, save_video
title = """**RGBD SBS output**"""
description = """**Video Depth Anything** + RGBD sbs output for viewing with Looking Glass Factory displays.
Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
def stitch_rgbd_videos(
processed_video: str,
depth_vis_video: str,
max_len: int = -1,
target_fps: int = -1,
max_res: int = 1280,
stitch: bool = True,
grayscale: bool = True,
convert_from_color: bool = True,
blur: float = 0.3,
output_dir: str = './outputs',
input_size: int = 518,
):
video_name = os.path.basename(processed_video)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
stitched_video_path = None
if stitch:
# Process videos frame by frame
cap_rgb = cv2.VideoCapture(processed_video)
cap_depth = cv2.VideoCapture(depth_vis_video)
if not cap_rgb.isOpened() or not cap_depth.isOpened():
print("Error: Could not open one or both videos")
return None
# Get video properties
original_fps = cap_rgb.get(cv2.CAP_PROP_FPS)
if target_fps <= 0:
target_fps = original_fps
# Calculate stride for frame skipping
stride = max(round(original_fps / target_fps), 1) if target_fps > 0 else 1
# Get frame counts for progress reporting
total_frames_rgb = int(cap_rgb.get(cv2.CAP_PROP_FRAME_COUNT))
print(f"Video fps: {original_fps}, target fps: {target_fps}, total frames: {total_frames_rgb}")
# Set up video writer
base_name = os.path.splitext(video_name)[0]
short_name = base_name[:20]
stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
# Get first frame to determine dimensions
ret_rgb, first_frame_rgb = cap_rgb.read()
ret_depth, first_frame_depth = cap_depth.read()
if not ret_rgb or not ret_depth:
print("Error: Could not read first frame from one or both videos")
return None
# Reset video captures
cap_rgb.set(cv2.CAP_PROP_POS_FRAMES, 0)
cap_depth.set(cv2.CAP_PROP_POS_FRAMES, 0)
# Get output dimensions
H_full, W_full = first_frame_rgb.shape[:2]
output_width = W_full * 2 # RGB and depth side by side
output_height = H_full
# Initialize video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(stitched_video_path, fourcc, target_fps, (output_width, output_height))
# Process frames one by one
frame_count = 0
processed_count = 0
while True:
# Read frames
ret_rgb, rgb_full = cap_rgb.read()
ret_depth, depth_frame = cap_depth.read()
# Break if either video ends
if not ret_rgb or not ret_depth:
break
# Skip frames based on stride
frame_count += 1
if frame_count % stride != 0:
continue
processed_count += 1
# Set max_len limit if specified
if max_len > 0 and processed_count > max_len:
break
# Process RGB frame - resize if max_res is specified
if max_res > 0:
h, w = rgb_full.shape[:2]
if max(h, w) > max_res:
scale = max_res / max(h, w)
new_h, new_w = int(h * scale), int(w * scale)
rgb_full = cv2.resize(rgb_full, (new_w, new_h))
# Process depth frame based on settings (assuming always 3-channel)
if grayscale:
if convert_from_color:
# Convert to grayscale if it's a color image
depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
depth_vis = np.stack([depth_gray] * 3, axis=-1)
else:
# Assume it's already the right format
depth_vis = depth_frame
else:
if np.max(depth_frame) > 0: # Ensure we have valid depth data
# Use the inferno colormap if requested
cmap = matplotlib.colormaps.get_cmap("inferno")
# Convert to single channel first
depth_gray = cv2.cvtColor(depth_frame, cv2.COLOR_RGB2GRAY)
# Normalize to 0-1 range for colormap
depth_norm = depth_gray / 255.0
# Apply colormap
depth_vis = (cmap(depth_norm)[..., :3] * 255).astype(np.uint8)
else:
# If zero depth, just use the original
depth_vis = depth_frame
# Apply Gaussian blur if requested
if blur > 0:
kernel_size = max(1, int(blur * 20) * 2 + 1) # Ensures an odd kernel size.
kernel_size = min(kernel_size, 31) # Cap kernel size at 31 (OpenCV limitation)
depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
# Resize the depth visualization to match the full-resolution RGB frame.
H_full, W_full = rgb_full.shape[:2]
depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
depth_vis_resized = depth_vis_resized.astype(np.uint8) # Ensure uint8
# Concatenate frames
stitched = cv2.hconcat([rgb_full, depth_vis_resized])
# Write frame
out.write(stitched)
# Free memory
del rgb_full, depth_vis, depth_vis_resized, stitched
# Progress report
if processed_count % 10 == 0:
print(f"Processed {processed_count} frames...")
# Force garbage collection periodically
if processed_count % 50 == 0:
gc.collect()
# Release resources
cap_rgb.release()
cap_depth.release()
out.release()
# Merge audio from the input video into the stitched video using ffmpeg.
temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
cmd = [
"ffmpeg",
"-y",
"-i", stitched_video_path,
"-i", processed_video,
"-c:v", "copy",
"-c:a", "aac",
"-map", "0:v:0",
"-map", "1:a:0?",
"-shortest",
temp_audio_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.replace(temp_audio_path, stitched_video_path)
print(f"Completed processing {processed_count} frames")
# Return stitched video.
return stitched_video_path
def construct_demo():
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
# Video input component for file upload.
processed_video = gr.Video(label="Input Video with Audio")
with gr.Column(scale=1):
depth_vis_video = gr.Video(label="Depth Video")
with gr.Column(scale=2):
with gr.Row(equal_height=True):
stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
with gr.Row(equal_height=True):
with gr.Column(scale=2):
with gr.Accordion("Advanced Settings", open=False):
max_len = gr.Slider(label="Max process length", minimum=-1, maximum=1000, value=-1, step=1)
target_fps = gr.Slider(label="Target FPS", minimum=-1, maximum=30, value=-1, step=1)
max_res = gr.Slider(label="Max side resolution", minimum=480, maximum=1920, value=1920, step=1)
stitch_option = gr.Checkbox(label="Stitch RGB & Depth Videos", value=True)
grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=True)
convert_from_color_option = gr.Checkbox(label="Convert Grayscale from Color", value=True)
blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur (can reduce edge artifacts on display)", value=0.3)
generate_btn = gr.Button("Generate")
with gr.Column(scale=1):
pass
generate_btn.click(
fn=stitch_rgbd_videos,
inputs=[processed_video, depth_vis_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider],
outputs=stitched_video,
)
return demo
if __name__ == "__main__":
demo = construct_demo()
demo.queue(max_size=4).launch() |