Krokodilpirat commited on
Commit
0b8aaf7
·
verified ·
1 Parent(s): 0184e46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -55
app.py CHANGED
@@ -5,39 +5,48 @@ import cv2
5
  import gradio as gr
6
  import numpy as np
7
  import matplotlib.cm as cm
8
- import matplotlib
9
  import subprocess
10
 
11
  from video_depth_anything.video_depth import VideoDepthAnything
12
  from utils.dc_utils import read_video_frames, save_video
13
  from huggingface_hub import hf_hub_download
14
 
 
 
 
15
  examples = [
16
- ['assets/example_videos/octopus_01.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
17
- ['assets/example_videos/chicken_01.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
18
- ['assets/example_videos/gorilla_01.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
19
- ['assets/example_videos/davis_rollercoaster.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
20
- ['assets/example_videos/Tokyo-Walk_rgb.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
21
- ['assets/example_videos/4158877-uhd_3840_2160_30fps_rgb.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
22
- ['assets/example_videos/4511004-uhd_3840_2160_24fps_rgb.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
23
- ['assets/example_videos/1753029-hd_1920_1080_30fps.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
24
- ['assets/example_videos/davis_burnout.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
25
- ['assets/example_videos/example_5473765-l.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
26
- ['assets/example_videos/Istanbul-26920.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
27
- ['assets/example_videos/obj_1.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
28
- ['assets/example_videos/sheep_cut1.mp4', -1, -1, 1280, True, True, True, 0.3, 1],
29
  ]
30
 
 
31
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
32
 
 
33
  model_configs = {
34
  'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
35
  'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
36
  }
37
- encoder2name = {'vits': 'Small', 'vitl': 'Large'}
 
 
 
38
  encoder = 'vitl'
39
  model_name = encoder2name[encoder]
40
 
 
41
  video_depth_anything = VideoDepthAnything(**model_configs[encoder])
42
  filepath = hf_hub_download(
43
  repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
@@ -51,27 +60,6 @@ title = "# Video Depth Anything + RGBD sbs output"
51
  description = """Official demo for **Video Depth Anything** + RGBD sbs output for viewing with Looking Glass Factory displays.
52
  Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
53
 
54
- def estimate_duration(input_video_path, repeat_factor):
55
- try:
56
- cmd = [
57
- "ffprobe", "-v", "error",
58
- "-select_streams", "v:0",
59
- "-show_entries", "format=duration",
60
- "-of", "default=noprint_wrappers=1:nokey=1",
61
- input_video_path
62
- ]
63
- result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
64
- duration = float(result.stdout.strip())
65
- final_duration = duration * repeat_factor
66
- return f"📏 Final RGBD video duration: ~{int(final_duration)} seconds"
67
- except Exception:
68
- return "⚠️ Unable to estimate duration."
69
-
70
- def update_duration_preview(video_file, repeat_factor):
71
- if video_file is None:
72
- return "Final video duration: N/A"
73
- return estimate_duration(video_file, repeat_factor)
74
-
75
  def infer_video_depth(
76
  input_video: str,
77
  max_len: int = -1,
@@ -81,27 +69,108 @@ def infer_video_depth(
81
  grayscale: bool = True,
82
  convert_from_color: bool = True,
83
  blur: float = 0.3,
84
- repeat_rgbd: int = 1,
85
  output_dir: str = './outputs',
86
  input_size: int = 518,
87
  ):
88
- ... # Der restliche Code bleibt gleich
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  def construct_demo():
91
  with gr.Blocks(analytics_enabled=False) as demo:
92
  gr.Markdown(title)
93
  gr.Markdown(description)
94
  gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
95
-
96
  with gr.Row(equal_height=True):
97
  with gr.Column(scale=1):
 
98
  input_video = gr.Video(label="Input Video")
99
  with gr.Column(scale=2):
100
  with gr.Row(equal_height=True):
101
- processed_video = gr.Video(label="Preprocessed Video", interactive=False, autoplay=True, loop=True, show_share_button=True)
102
- depth_vis_video = gr.Video(label="Generated Depth Video", interactive=False, autoplay=True, loop=True, show_share_button=True)
103
- stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True)
104
-
105
  with gr.Row(equal_height=True):
106
  with gr.Column(scale=1):
107
  with gr.Accordion("Advanced Settings", open=False):
@@ -112,33 +181,28 @@ def construct_demo():
112
  grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=True)
113
  convert_from_color_option = gr.Checkbox(label="Convert Grayscale from Color", value=True)
114
  blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur (can reduce edge artifacts on display)", value=0.3)
115
- repeat_rgbd_slider = gr.Slider(label="Repeat RGBD Video (Times)", minimum=1, maximum=20, value=1, step=1)
116
- duration_preview = gr.Markdown("Final video duration: N/A", visible=True)
117
  generate_btn = gr.Button("Generate")
118
  with gr.Column(scale=2):
119
  pass
120
-
121
  gr.Examples(
122
  examples=examples,
123
- inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider, repeat_rgbd_slider],
124
  outputs=[processed_video, depth_vis_video, stitched_video],
125
  fn=infer_video_depth,
126
  cache_examples=False,
127
  cache_mode="lazy",
128
  )
129
-
130
- input_video.change(fn=update_duration_preview, inputs=[input_video, repeat_rgbd_slider], outputs=[duration_preview])
131
- repeat_rgbd_slider.change(fn=update_duration_preview, inputs=[input_video, repeat_rgbd_slider], outputs=[duration_preview])
132
-
133
  generate_btn.click(
134
  fn=infer_video_depth,
135
- inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider, repeat_rgbd_slider],
136
  outputs=[processed_video, depth_vis_video, stitched_video],
137
  )
138
-
139
  return demo
140
 
141
  if __name__ == "__main__":
142
  demo = construct_demo()
143
- demo.queue()
144
- demo.launch(share=True, ssr=False)
 
5
  import gradio as gr
6
  import numpy as np
7
  import matplotlib.cm as cm
8
+ import matplotlib # New import for the updated colormap API
9
  import subprocess
10
 
11
  from video_depth_anything.video_depth import VideoDepthAnything
12
  from utils.dc_utils import read_video_frames, save_video
13
  from huggingface_hub import hf_hub_download
14
 
15
+ # Examples for the Gradio Demo.
16
+ # Each example now contains 8 parameters:
17
+ # [video_path, max_len, target_fps, max_res, stitch, grayscale, convert_from_color, blur]
18
  examples = [
19
+ ['assets/example_videos/octopus_01.mp4', -1, -1, 1280, True, True, True, 0.3],
20
+ ['assets/example_videos/chicken_01.mp4', -1, -1, 1280, True, True, True, 0.3],
21
+ ['assets/example_videos/gorilla_01.mp4', -1, -1, 1280, True, True, True, 0.3],
22
+ ['assets/example_videos/davis_rollercoaster.mp4', -1, -1, 1280, True, True, True, 0.3],
23
+ ['assets/example_videos/Tokyo-Walk_rgb.mp4', -1, -1, 1280, True, True, True, 0.3],
24
+ ['assets/example_videos/4158877-uhd_3840_2160_30fps_rgb.mp4', -1, -1, 1280, True, True, True, 0.3],
25
+ ['assets/example_videos/4511004-uhd_3840_2160_24fps_rgb.mp4', -1, -1, 1280, True, True, True, 0.3],
26
+ ['assets/example_videos/1753029-hd_1920_1080_30fps.mp4', -1, -1, 1280, True, True, True, 0.3],
27
+ ['assets/example_videos/davis_burnout.mp4', -1, -1, 1280, True, True, True, 0.3],
28
+ ['assets/example_videos/example_5473765-l.mp4', -1, -1, 1280, True, True, True, 0.3],
29
+ ['assets/example_videos/Istanbul-26920.mp4', -1, -1, 1280, True, True, True, 0.3],
30
+ ['assets/example_videos/obj_1.mp4', -1, -1, 1280, True, True, True, 0.3],
31
+ ['assets/example_videos/sheep_cut1.mp4', -1, -1, 1280, True, True, True, 0.3],
32
  ]
33
 
34
+ # Use GPU if available; otherwise, use CPU.
35
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
36
 
37
+ # Model configuration for different encoder variants.
38
  model_configs = {
39
  'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
40
  'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
41
  }
42
+ encoder2name = {
43
+ 'vits': 'Small',
44
+ 'vitl': 'Large',
45
+ }
46
  encoder = 'vitl'
47
  model_name = encoder2name[encoder]
48
 
49
+ # Initialize the model.
50
  video_depth_anything = VideoDepthAnything(**model_configs[encoder])
51
  filepath = hf_hub_download(
52
  repo_id=f"depth-anything/Video-Depth-Anything-{model_name}",
 
60
  description = """Official demo for **Video Depth Anything** + RGBD sbs output for viewing with Looking Glass Factory displays.
61
  Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def infer_video_depth(
64
  input_video: str,
65
  max_len: int = -1,
 
69
  grayscale: bool = True,
70
  convert_from_color: bool = True,
71
  blur: float = 0.3,
 
72
  output_dir: str = './outputs',
73
  input_size: int = 518,
74
  ):
75
+ # 1. Read input video frames for inference (downscaled to max_res).
76
+ frames, target_fps = read_video_frames(input_video, max_len, target_fps, max_res)
77
+ # 2. Perform depth inference using the model.
78
+ depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
79
+
80
+ video_name = os.path.basename(input_video)
81
+ if not os.path.exists(output_dir):
82
+ os.makedirs(output_dir)
83
+
84
+ # Save the preprocessed (RGB) video and the generated depth visualization.
85
+ processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_src.mp4')
86
+ depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0] + '_vis.mp4')
87
+ save_video(frames, processed_video_path, fps=fps)
88
+ save_video(depths, depth_vis_path, fps=fps, is_depths=True)
89
+
90
+ stitched_video_path = None
91
+ if stitch:
92
+ # For stitching: read the original video in full resolution (without downscaling).
93
+ full_frames, _ = read_video_frames(input_video, max_len, target_fps, max_res=-1)
94
+ # For each frame, create a visual depth image from the inferenced depths.
95
+ d_min, d_max = depths.min(), depths.max()
96
+ stitched_frames = []
97
+ for i in range(min(len(full_frames), len(depths))):
98
+ rgb_full = full_frames[i] # Full-resolution RGB frame.
99
+ depth_frame = depths[i]
100
+ # Normalize the depth frame to the range [0, 255].
101
+ depth_norm = ((depth_frame - d_min) / (d_max - d_min) * 255).astype(np.uint8)
102
+ # Generate depth visualization:
103
+ if grayscale:
104
+ if convert_from_color:
105
+ # First, generate a color depth image using the inferno colormap,
106
+ # then convert that color image to grayscale.
107
+ cmap = matplotlib.colormaps.get_cmap("inferno")
108
+ depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
109
+ depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
110
+ depth_vis = np.stack([depth_gray] * 3, axis=-1)
111
+ else:
112
+ # Directly generate a grayscale image from the normalized depth values.
113
+ depth_vis = np.stack([depth_norm] * 3, axis=-1)
114
+ else:
115
+ # Generate a color depth image using the inferno colormap.
116
+ cmap = matplotlib.colormaps.get_cmap("inferno")
117
+ depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
118
+ # Apply Gaussian blur if requested.
119
+ if blur > 0:
120
+ kernel_size = int(blur * 20) * 2 + 1 # Ensures an odd kernel size.
121
+ depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
122
+ # Resize the depth visualization to match the full-resolution RGB frame.
123
+ H_full, W_full = rgb_full.shape[:2]
124
+ depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
125
+ # Concatenate the full-resolution RGB frame (left) and the resized depth visualization (right).
126
+ stitched = cv2.hconcat([rgb_full, depth_vis_resized])
127
+ stitched_frames.append(stitched)
128
+ stitched_frames = np.array(stitched_frames)
129
+ # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
130
+ base_name = os.path.splitext(video_name)[0]
131
+ short_name = base_name[:20]
132
+ stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
133
+ save_video(stitched_frames, stitched_video_path, fps=fps)
134
+
135
+ # Merge audio from the input video into the stitched video using ffmpeg.
136
+ temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
137
+ cmd = [
138
+ "ffmpeg",
139
+ "-y",
140
+ "-i", stitched_video_path,
141
+ "-i", input_video,
142
+ "-c:v", "copy",
143
+ "-c:a", "aac",
144
+ "-map", "0:v:0",
145
+ "-map", "1:a:0?",
146
+ "-shortest",
147
+ temp_audio_path
148
+ ]
149
+ subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
150
+ os.replace(temp_audio_path, stitched_video_path)
151
+
152
+ gc.collect()
153
+ torch.cuda.empty_cache()
154
+
155
+ # Return the preprocessed RGB video, depth visualization, and (if created) the stitched video.
156
+ return [processed_video_path, depth_vis_path, stitched_video_path]
157
 
158
  def construct_demo():
159
  with gr.Blocks(analytics_enabled=False) as demo:
160
  gr.Markdown(title)
161
  gr.Markdown(description)
162
  gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
163
+
164
  with gr.Row(equal_height=True):
165
  with gr.Column(scale=1):
166
+ # Video input component for file upload.
167
  input_video = gr.Video(label="Input Video")
168
  with gr.Column(scale=2):
169
  with gr.Row(equal_height=True):
170
+ processed_video = gr.Video(label="Preprocessed Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
171
+ depth_vis_video = gr.Video(label="Generated Depth Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
172
+ stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
173
+
174
  with gr.Row(equal_height=True):
175
  with gr.Column(scale=1):
176
  with gr.Accordion("Advanced Settings", open=False):
 
181
  grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=True)
182
  convert_from_color_option = gr.Checkbox(label="Convert Grayscale from Color", value=True)
183
  blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur (can reduce edge artifacts on display)", value=0.3)
 
 
184
  generate_btn = gr.Button("Generate")
185
  with gr.Column(scale=2):
186
  pass
187
+
188
  gr.Examples(
189
  examples=examples,
190
+ inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider],
191
  outputs=[processed_video, depth_vis_video, stitched_video],
192
  fn=infer_video_depth,
193
  cache_examples=False,
194
  cache_mode="lazy",
195
  )
196
+
 
 
 
197
  generate_btn.click(
198
  fn=infer_video_depth,
199
+ inputs=[input_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider],
200
  outputs=[processed_video, depth_vis_video, stitched_video],
201
  )
202
+
203
  return demo
204
 
205
  if __name__ == "__main__":
206
  demo = construct_demo()
207
+ demo.queue() # Enable asynchronous processing.
208
+ demo.launch(share=True)