Depth-Anything-V2-Video

Running

App Files Files Community

freealise commited on Dec 11, 2024

Commit

2ee8bb5

verified ·

1 Parent(s): 737741b

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -48

app.py CHANGED Viewed

@@ -43,6 +43,7 @@ dcolor = []
 pcolors = []
 frame_selected = 0
 frames = []
 depths = []
 masks = []
 locations = []
@@ -75,7 +76,7 @@ def predict_depth(image, model):
 #def predict_depth(model, image):
 #    return model(image)["depth"]
-def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin):
     if encoder not in ["vitl","vitb","vits","vitg"]:
         encoder = "vits"
@@ -157,6 +158,7 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
         n = 0
         depth_frames = []
         orig_frames = []
         thumbnail_old = []
         while raw_video.isOpened():
@@ -189,12 +191,12 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
             #white_lo = np.array([250,250,250])
             #white_hi = np.array([255,255,255])
             # mask image to only select white
-            mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*6.5)-1, 0:depth_gray.shape[1]], 250, 255)
             # change image to black where we found white
-            depth_gray[0:int(depth_gray.shape[0]/8*6.5)-1, 0:depth_gray.shape[1]][mask>0] = 0
-            mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*6.5):depth_gray.shape[0], 0:depth_gray.shape[1]], 160, 255)
-            depth_gray[int(depth_gray.shape[0]/8*6.5):depth_gray.shape[0], 0:depth_gray.shape[1]][mask>0] = 160
             depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
             # split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
@@ -218,14 +220,19 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
                       count += 1
                       continue
               thumbnail_old = thumbnail
-            cv2.imwrite(f"f{count}.png", blur_image(raw_frame, depth_color, blur_data))
             orig_frames.append(f"f{count}.png")
             cv2.imwrite(f"f{count}_dmap.png", depth_color)
             depth_frames.append(f"f{count}_dmap.png")
-            depth_gray = seg_frame(depth_gray) + 128
             print(depth_gray[depth_gray>128]-128)
             cv2.imwrite(f"f{count}_mask.png", depth_gray)
@@ -244,7 +251,9 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
         global frame_selected
         global depths
         global frames
         frames = orig_frames
         depths = depth_frames
         if depth_color.shape[0] == 2048: #height
@@ -419,7 +428,7 @@ def show_json(txt):
     return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
-def seg_frame(newmask):
     if newmask.shape[0] == 2048: #height
         gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
@@ -431,8 +440,8 @@ def seg_frame(newmask):
     newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
     ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
-    b = 1
-    d = 32
     element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
     bd = cv2.erode(newmask, element)
     element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
@@ -539,44 +548,24 @@ def bincount(a):
     a1D = np.ravel_multi_index(a2D.T, col_range)
     return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
-def reset_mask():
     global frame_selected
-    global masks
-    global depths
-    global edge
-    edge = []
-    mask = cv2.imread(depths[frame_selected]).astype(np.uint8)
-    cv2.imwrite(masks[frame_selected], cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY))
-    return masks[frame_selected], depths
-def apply_mask(d,b):
     global frames
-    global frame_selected
     global masks
     global depths
     global edge
     edge = []
-    mask = cv2.cvtColor(d["layers"][0], cv2.COLOR_RGBA2GRAY)
-    mask[mask<128] = 128
-    mask = mask - 128
-    print(mask[mask>0])
-    mask, bgdModel, fgdModel = cv2.grabCut(cv2.cvtColor(d["background"], cv2.COLOR_RGBA2RGB), mask, None,None,None,15, cv2.GC_INIT_WITH_MASK)
-    mask = np.where((mask==2)|(mask==0),1,0).astype('uint8')
-    frame = cv2.imread(frames[frame_selected], cv2.IMREAD_UNCHANGED).astype(np.uint8)
-    frame[:, :, 3] = mask * 255
-    cv2.imwrite(frames[frame_selected], frame)
-    mask = cv2.imread(masks[frame_selected], cv2.IMREAD_GRAYSCALE).astype(np.uint8)
-    mask[mask==128] = 0
-    d["layers"][0] = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGBA)
-    return gr.ImageEditor(value=d), depths, frames
-def draw_mask(l, t, b, v, d, evt: gr.EventData):
     global frames
     global depths
     global params
@@ -974,16 +963,13 @@ with gr.Blocks(css=css, js=js) as demo:
               document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
             ' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
-              apply = gr.Button("Apply", size='sm')
               reset = gr.Button("Reset", size='sm')
-            with gr.Accordion(label="Edge", open=False):
-              levels = gr.Slider(label="Color levels", value=16, maximum=32, minimum=2, step=1)
-              tolerance = gr.Slider(label="Tolerance", value=1, maximum=7, minimum=0, step=1)
-              bsize = gr.Slider(label="Border size", value=15, maximum=256, minimum=1, step=2)
               mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
-              mouse.input(fn=draw_mask, show_progress="minimal", inputs=[levels, tolerance, bsize, mouse, output_mask], outputs=[output_mask])
-              apply.click(fn=apply_mask, inputs=[output_mask, bsize], outputs=[output_mask, output_depth, output_frame])
-              reset.click(fn=reset_mask, inputs=None, outputs=[output_mask, output_depth])
             normals_out = gr.Image(label="Normal map", interactive=False)
             format_normals = gr.Radio(choices=["directx", "opengl"])
@@ -1189,12 +1175,12 @@ with gr.Blocks(css=css, js=js) as demo:
         return output_video_path + (json.dumps(locations),)
-    submit.click(on_submit, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
     render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
     render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
-    example_files = [["./examples/streetview.mp4", "vits", blurin, example_coords]]
-    examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
 if __name__ == '__main__':

 pcolors = []
 frame_selected = 0
 frames = []
+backups = []
 depths = []
 masks = []
 locations = []
 #def predict_depth(model, image):
 #    return model(image)["depth"]
+def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin, o=1, b=32):
     if encoder not in ["vitl","vitb","vits","vitg"]:
         encoder = "vits"
         n = 0
         depth_frames = []
         orig_frames = []
+        backup_frames = []
         thumbnail_old = []
         while raw_video.isOpened():
             #white_lo = np.array([250,250,250])
             #white_hi = np.array([255,255,255])
             # mask image to only select white
+            mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]], 250, 255)
             # change image to black where we found white
+            depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]][mask>0] = 0
+            mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]], 192, 255)
+            depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]][mask>0] = 192
             depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
             # split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
                       count += 1
                       continue
               thumbnail_old = thumbnail
+            blur_frame = blur_image(raw_frame, depth_color, blur_data)
+            cv2.imwrite(f"f{count}.png", blur_frame)
             orig_frames.append(f"f{count}.png")
+            cv2.imwrite(f"f{count}_.png", blur_frame)
+            backup_frames.append(f"f{count}_.png")
             cv2.imwrite(f"f{count}_dmap.png", depth_color)
             depth_frames.append(f"f{count}_dmap.png")
+            depth_gray = seg_frame(depth_gray, o, b) + 128
             print(depth_gray[depth_gray>128]-128)
             cv2.imwrite(f"f{count}_mask.png", depth_gray)
         global frame_selected
         global depths
         global frames
+        global backups
         frames = orig_frames
+        backups = backup_frames
         depths = depth_frames
         if depth_color.shape[0] == 2048: #height
     return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
+def seg_frame(newmask, b, d):
     if newmask.shape[0] == 2048: #height
         gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
     newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
     ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
+    #b = 1
+    #d = 32
     element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
     bd = cv2.erode(newmask, element)
     element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
     a1D = np.ravel_multi_index(a2D.T, col_range)
     return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
+def reset_mask(d):
     global frame_selected
     global frames
+    global backups
     global masks
     global depths
     global edge
     edge = []
+    backup = cv2.imread(backups[frame_selected]).astype(np.uint8)
+    cv2.imwrite(frames[frame_selected], backup)
+    d["layers"][0] = (0,0,0,0)
+    return gr.ImageEditor(value=d)
+def draw_mask(o, b, v, d, evt: gr.EventData):
     global frames
     global depths
     global params
               document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
             ' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
               reset = gr.Button("Reset", size='sm')
+            with gr.Accordion(label="Border", open=False):
+              boffset = gr.Slider(label="Offset", value=1, maximum=256, minimum=0, step=1)
+              bsize = gr.Slider(label="Size", value=32, maximum=256, minimum=0, step=1)
               mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
+              mouse.input(fn=draw_mask, show_progress="minimal", inputs=[boffset, bsize, mouse, output_mask], outputs=[output_mask])
+              reset.click(fn=reset_mask, inputs=[output_mask], outputs=[output_mask])
             normals_out = gr.Image(label="Normal map", interactive=False)
             format_normals = gr.Radio(choices=["directx", "opengl"])
         return output_video_path + (json.dumps(locations),)
+    submit.click(on_submit, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
     render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
     render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
+    example_files = [["./examples/streetview.mp4", "vits", blurin, 1, 32, example_coords]]
+    examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
 if __name__ == '__main__':