mtwohey2 commited on
Commit
6bfa5c9
·
verified ·
1 Parent(s): de898e9

Update app.py

Browse files

Lower the memory usage by processing each video frame at one time.

Files changed (1) hide show
  1. app.py +92 -41
app.py CHANGED
@@ -33,36 +33,85 @@ def stitch_rgbd_videos(
33
 
34
  stitched_video_path = None
35
  if stitch:
36
- # Ensure target_fps is valid (positive) or use original fps
37
- safe_target_fps = max(1, target_fps) if target_fps > 0 else -1
 
38
 
39
- # For stitching: read the original video in full resolution (without downscaling).
40
- full_frames, original_fps = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
41
- depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
42
-
43
- # Use original_fps if target_fps wasn't specified
 
44
  if target_fps <= 0:
45
  target_fps = original_fps
46
 
47
- print(f"Video fps: {original_fps}, target fps: {target_fps}")
48
- print(f"Depth frame shape: {depths[0].shape if len(depths) > 0 else 'No frames'}, "
49
- f"dtype: {depths[0].dtype if len(depths) > 0 else 'N/A'}, "
50
- f"min: {depths.min() if len(depths) > 0 else 'N/A'}, "
51
- f"max: {depths.max() if len(depths) > 0 else 'N/A'}")
52
-
53
- if len(depths) == 0 or len(full_frames) == 0:
54
- print("Error: No frames to process in either RGB or depth video")
 
 
 
 
 
 
 
 
 
 
55
  return None
56
 
57
- # For each frame, create a visual depth image from the inferenced depths.
58
- d_min, d_max = np.min(depths), np.max(depths)
59
- print(f"Depth range: min={d_min}, max={d_max}, diff={d_max-d_min}")
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- stitched_frames = []
62
- for i in range(min(len(full_frames), len(depths))):
63
- rgb_full = full_frames[i] # Full-resolution RGB frame.
64
- depth_frame = depths[i] # Already in uint8 format
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  if grayscale:
67
  if convert_from_color:
68
  # Convert to grayscale if it's a color image
@@ -72,7 +121,7 @@ def stitch_rgbd_videos(
72
  # Assume it's already the right format
73
  depth_vis = depth_frame
74
  else:
75
- if depth_frame.max() > 0: # Ensure we have valid depth data
76
  # Use the inferno colormap if requested
77
  cmap = matplotlib.colormaps.get_cmap("inferno")
78
  # Convert to single channel first
@@ -84,7 +133,6 @@ def stitch_rgbd_videos(
84
  else:
85
  # If zero depth, just use the original
86
  depth_vis = depth_frame
87
-
88
 
89
  # Apply Gaussian blur if requested
90
  if blur > 0:
@@ -97,26 +145,27 @@ def stitch_rgbd_videos(
97
  depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
98
  depth_vis_resized = depth_vis_resized.astype(np.uint8) # Ensure uint8
99
 
100
- if len(depth_vis_resized.shape) == 2:
101
- depth_vis_resized = cv2.cvtColor(depth_vis_resized, cv2.COLOR_GRAY2BGR)
102
 
103
- # Ensure both are the same type (commonly uint8):
104
- if rgb_full.dtype != depth_vis_resized.dtype:
105
- depth_vis_resized = depth_vis_resized.astype(rgb_full.dtype)
106
 
107
- # Now safely concatenate.
108
- stitched = cv2.hconcat([rgb_full, depth_vis_resized])
109
- stitched_frames.append(stitched)
110
-
111
- del rgb_full, depth_vis_resized, stitched
112
- gc.collect() # Force Python to free unused memory
113
 
114
- stitched_frames = np.array(stitched_frames)
115
- # Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
116
- base_name = os.path.splitext(video_name)[0]
117
- short_name = base_name[:20]
118
- stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
119
- save_video(stitched_frames, stitched_video_path, fps=target_fps)
 
 
 
 
 
 
120
 
121
  # Merge audio from the input video into the stitched video using ffmpeg.
122
  temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
@@ -134,6 +183,8 @@ def stitch_rgbd_videos(
134
  ]
135
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
136
  os.replace(temp_audio_path, stitched_video_path)
 
 
137
 
138
  # Return stitched video.
139
  return stitched_video_path
 
33
 
34
  stitched_video_path = None
35
  if stitch:
36
+ # Process videos frame by frame
37
+ cap_rgb = cv2.VideoCapture(processed_video)
38
+ cap_depth = cv2.VideoCapture(depth_vis_video)
39
 
40
+ if not cap_rgb.isOpened() or not cap_depth.isOpened():
41
+ print("Error: Could not open one or both videos")
42
+ return None
43
+
44
+ # Get video properties
45
+ original_fps = cap_rgb.get(cv2.CAP_PROP_FPS)
46
  if target_fps <= 0:
47
  target_fps = original_fps
48
 
49
+ # Calculate stride for frame skipping
50
+ stride = max(round(original_fps / target_fps), 1) if target_fps > 0 else 1
51
+
52
+ # Get frame counts for progress reporting
53
+ total_frames_rgb = int(cap_rgb.get(cv2.CAP_PROP_FRAME_COUNT))
54
+ print(f"Video fps: {original_fps}, target fps: {target_fps}, total frames: {total_frames_rgb}")
55
+
56
+ # Set up video writer
57
+ base_name = os.path.splitext(video_name)[0]
58
+ short_name = base_name[:20]
59
+ stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
60
+
61
+ # Get first frame to determine dimensions
62
+ ret_rgb, first_frame_rgb = cap_rgb.read()
63
+ ret_depth, first_frame_depth = cap_depth.read()
64
+
65
+ if not ret_rgb or not ret_depth:
66
+ print("Error: Could not read first frame from one or both videos")
67
  return None
68
 
69
+ # Reset video captures
70
+ cap_rgb.set(cv2.CAP_PROP_POS_FRAMES, 0)
71
+ cap_depth.set(cv2.CAP_PROP_POS_FRAMES, 0)
72
+
73
+ # Get output dimensions
74
+ H_full, W_full = first_frame_rgb.shape[:2]
75
+ output_width = W_full * 2 # RGB and depth side by side
76
+ output_height = H_full
77
+
78
+ # Initialize video writer
79
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
80
+ out = cv2.VideoWriter(stitched_video_path, fourcc, target_fps, (output_width, output_height))
81
+
82
+ # Process frames one by one
83
+ frame_count = 0
84
+ processed_count = 0
85
 
86
+ while True:
87
+ # Read frames
88
+ ret_rgb, rgb_full = cap_rgb.read()
89
+ ret_depth, depth_frame = cap_depth.read()
90
 
91
+ # Break if either video ends
92
+ if not ret_rgb or not ret_depth:
93
+ break
94
+
95
+ # Skip frames based on stride
96
+ frame_count += 1
97
+ if frame_count % stride != 0:
98
+ continue
99
+
100
+ processed_count += 1
101
+
102
+ # Set max_len limit if specified
103
+ if max_len > 0 and processed_count > max_len:
104
+ break
105
+
106
+ # Process RGB frame - resize if max_res is specified
107
+ if max_res > 0:
108
+ h, w = rgb_full.shape[:2]
109
+ if max(h, w) > max_res:
110
+ scale = max_res / max(h, w)
111
+ new_h, new_w = int(h * scale), int(w * scale)
112
+ rgb_full = cv2.resize(rgb_full, (new_w, new_h))
113
+
114
+ # Process depth frame based on settings (assuming always 3-channel)
115
  if grayscale:
116
  if convert_from_color:
117
  # Convert to grayscale if it's a color image
 
121
  # Assume it's already the right format
122
  depth_vis = depth_frame
123
  else:
124
+ if np.max(depth_frame) > 0: # Ensure we have valid depth data
125
  # Use the inferno colormap if requested
126
  cmap = matplotlib.colormaps.get_cmap("inferno")
127
  # Convert to single channel first
 
133
  else:
134
  # If zero depth, just use the original
135
  depth_vis = depth_frame
 
136
 
137
  # Apply Gaussian blur if requested
138
  if blur > 0:
 
145
  depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
146
  depth_vis_resized = depth_vis_resized.astype(np.uint8) # Ensure uint8
147
 
148
+ # Concatenate frames
149
+ stitched = cv2.hconcat([rgb_full, depth_vis_resized])
150
 
151
+ # Write frame
152
+ out.write(stitched)
 
153
 
154
+ # Free memory
155
+ del rgb_full, depth_vis, depth_vis_resized, stitched
 
 
 
 
156
 
157
+ # Progress report
158
+ if processed_count % 10 == 0:
159
+ print(f"Processed {processed_count} frames...")
160
+
161
+ # Force garbage collection periodically
162
+ if processed_count % 50 == 0:
163
+ gc.collect()
164
+
165
+ # Release resources
166
+ cap_rgb.release()
167
+ cap_depth.release()
168
+ out.release()
169
 
170
  # Merge audio from the input video into the stitched video using ffmpeg.
171
  temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
 
183
  ]
184
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
185
  os.replace(temp_audio_path, stitched_video_path)
186
+
187
+ print(f"Completed processing {processed_count} frames")
188
 
189
  # Return stitched video.
190
  return stitched_video_path