Update app.py
Browse files
app.py
CHANGED
|
@@ -43,6 +43,7 @@ dcolor = []
|
|
| 43 |
pcolors = []
|
| 44 |
frame_selected = 0
|
| 45 |
frames = []
|
|
|
|
| 46 |
depths = []
|
| 47 |
masks = []
|
| 48 |
locations = []
|
|
@@ -75,7 +76,7 @@ def predict_depth(image, model):
|
|
| 75 |
#def predict_depth(model, image):
|
| 76 |
# return model(image)["depth"]
|
| 77 |
|
| 78 |
-
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin):
|
| 79 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
| 80 |
encoder = "vits"
|
| 81 |
|
|
@@ -157,6 +158,7 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
| 157 |
n = 0
|
| 158 |
depth_frames = []
|
| 159 |
orig_frames = []
|
|
|
|
| 160 |
thumbnail_old = []
|
| 161 |
|
| 162 |
while raw_video.isOpened():
|
|
@@ -189,12 +191,12 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
| 189 |
#white_lo = np.array([250,250,250])
|
| 190 |
#white_hi = np.array([255,255,255])
|
| 191 |
# mask image to only select white
|
| 192 |
-
mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*
|
| 193 |
# change image to black where we found white
|
| 194 |
-
depth_gray[0:int(depth_gray.shape[0]/8*
|
| 195 |
|
| 196 |
-
mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*
|
| 197 |
-
depth_gray[int(depth_gray.shape[0]/8*
|
| 198 |
|
| 199 |
depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
|
| 200 |
# split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
|
@@ -218,14 +220,19 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
| 218 |
count += 1
|
| 219 |
continue
|
| 220 |
thumbnail_old = thumbnail
|
|
|
|
|
|
|
| 221 |
|
| 222 |
-
cv2.imwrite(f"f{count}.png",
|
| 223 |
orig_frames.append(f"f{count}.png")
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
cv2.imwrite(f"f{count}_dmap.png", depth_color)
|
| 226 |
depth_frames.append(f"f{count}_dmap.png")
|
| 227 |
|
| 228 |
-
depth_gray = seg_frame(depth_gray) + 128
|
| 229 |
print(depth_gray[depth_gray>128]-128)
|
| 230 |
|
| 231 |
cv2.imwrite(f"f{count}_mask.png", depth_gray)
|
|
@@ -244,7 +251,9 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
| 244 |
global frame_selected
|
| 245 |
global depths
|
| 246 |
global frames
|
|
|
|
| 247 |
frames = orig_frames
|
|
|
|
| 248 |
depths = depth_frames
|
| 249 |
|
| 250 |
if depth_color.shape[0] == 2048: #height
|
|
@@ -419,7 +428,7 @@ def show_json(txt):
|
|
| 419 |
return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
|
| 420 |
|
| 421 |
|
| 422 |
-
def seg_frame(newmask):
|
| 423 |
|
| 424 |
if newmask.shape[0] == 2048: #height
|
| 425 |
gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
|
@@ -431,8 +440,8 @@ def seg_frame(newmask):
|
|
| 431 |
newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
|
| 432 |
ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
| 433 |
|
| 434 |
-
b = 1
|
| 435 |
-
d = 32
|
| 436 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
|
| 437 |
bd = cv2.erode(newmask, element)
|
| 438 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
|
|
@@ -539,44 +548,24 @@ def bincount(a):
|
|
| 539 |
a1D = np.ravel_multi_index(a2D.T, col_range)
|
| 540 |
return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
|
| 541 |
|
| 542 |
-
def reset_mask():
|
| 543 |
global frame_selected
|
| 544 |
-
global masks
|
| 545 |
-
global depths
|
| 546 |
-
global edge
|
| 547 |
-
|
| 548 |
-
edge = []
|
| 549 |
-
mask = cv2.imread(depths[frame_selected]).astype(np.uint8)
|
| 550 |
-
cv2.imwrite(masks[frame_selected], cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY))
|
| 551 |
-
return masks[frame_selected], depths
|
| 552 |
-
|
| 553 |
-
def apply_mask(d,b):
|
| 554 |
global frames
|
| 555 |
-
global
|
| 556 |
global masks
|
| 557 |
global depths
|
| 558 |
global edge
|
| 559 |
|
| 560 |
edge = []
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
mask = mask - 128
|
| 564 |
-
print(mask[mask>0])
|
| 565 |
|
| 566 |
-
|
| 567 |
-
mask = np.where((mask==2)|(mask==0),1,0).astype('uint8')
|
| 568 |
|
| 569 |
-
|
| 570 |
-
frame[:, :, 3] = mask * 255
|
| 571 |
-
cv2.imwrite(frames[frame_selected], frame)
|
| 572 |
|
| 573 |
-
mask = cv2.imread(masks[frame_selected], cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
| 574 |
-
mask[mask==128] = 0
|
| 575 |
-
d["layers"][0] = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGBA)
|
| 576 |
-
|
| 577 |
-
return gr.ImageEditor(value=d), depths, frames
|
| 578 |
|
| 579 |
-
def draw_mask(
|
| 580 |
global frames
|
| 581 |
global depths
|
| 582 |
global params
|
|
@@ -974,16 +963,13 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
| 974 |
document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
|
| 975 |
|
| 976 |
' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
|
| 977 |
-
apply = gr.Button("Apply", size='sm')
|
| 978 |
reset = gr.Button("Reset", size='sm')
|
| 979 |
-
with gr.Accordion(label="
|
| 980 |
-
|
| 981 |
-
|
| 982 |
-
bsize = gr.Slider(label="Border size", value=15, maximum=256, minimum=1, step=2)
|
| 983 |
mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
|
| 984 |
-
mouse.input(fn=draw_mask, show_progress="minimal", inputs=[
|
| 985 |
-
|
| 986 |
-
reset.click(fn=reset_mask, inputs=None, outputs=[output_mask, output_depth])
|
| 987 |
|
| 988 |
normals_out = gr.Image(label="Normal map", interactive=False)
|
| 989 |
format_normals = gr.Radio(choices=["directx", "opengl"])
|
|
@@ -1189,12 +1175,12 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
| 1189 |
|
| 1190 |
return output_video_path + (json.dumps(locations),)
|
| 1191 |
|
| 1192 |
-
submit.click(on_submit, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
| 1193 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
| 1194 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
| 1195 |
|
| 1196 |
-
example_files = [["./examples/streetview.mp4", "vits", blurin, example_coords]]
|
| 1197 |
-
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
| 1198 |
|
| 1199 |
|
| 1200 |
if __name__ == '__main__':
|
|
|
|
| 43 |
pcolors = []
|
| 44 |
frame_selected = 0
|
| 45 |
frames = []
|
| 46 |
+
backups = []
|
| 47 |
depths = []
|
| 48 |
masks = []
|
| 49 |
locations = []
|
|
|
|
| 76 |
#def predict_depth(model, image):
|
| 77 |
# return model(image)["depth"]
|
| 78 |
|
| 79 |
+
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin, o=1, b=32):
|
| 80 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
| 81 |
encoder = "vits"
|
| 82 |
|
|
|
|
| 158 |
n = 0
|
| 159 |
depth_frames = []
|
| 160 |
orig_frames = []
|
| 161 |
+
backup_frames = []
|
| 162 |
thumbnail_old = []
|
| 163 |
|
| 164 |
while raw_video.isOpened():
|
|
|
|
| 191 |
#white_lo = np.array([250,250,250])
|
| 192 |
#white_hi = np.array([255,255,255])
|
| 193 |
# mask image to only select white
|
| 194 |
+
mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]], 250, 255)
|
| 195 |
# change image to black where we found white
|
| 196 |
+
depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]][mask>0] = 0
|
| 197 |
|
| 198 |
+
mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]], 192, 255)
|
| 199 |
+
depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]][mask>0] = 192
|
| 200 |
|
| 201 |
depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
|
| 202 |
# split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
|
|
|
| 220 |
count += 1
|
| 221 |
continue
|
| 222 |
thumbnail_old = thumbnail
|
| 223 |
+
|
| 224 |
+
blur_frame = blur_image(raw_frame, depth_color, blur_data)
|
| 225 |
|
| 226 |
+
cv2.imwrite(f"f{count}.png", blur_frame)
|
| 227 |
orig_frames.append(f"f{count}.png")
|
| 228 |
+
|
| 229 |
+
cv2.imwrite(f"f{count}_.png", blur_frame)
|
| 230 |
+
backup_frames.append(f"f{count}_.png")
|
| 231 |
|
| 232 |
cv2.imwrite(f"f{count}_dmap.png", depth_color)
|
| 233 |
depth_frames.append(f"f{count}_dmap.png")
|
| 234 |
|
| 235 |
+
depth_gray = seg_frame(depth_gray, o, b) + 128
|
| 236 |
print(depth_gray[depth_gray>128]-128)
|
| 237 |
|
| 238 |
cv2.imwrite(f"f{count}_mask.png", depth_gray)
|
|
|
|
| 251 |
global frame_selected
|
| 252 |
global depths
|
| 253 |
global frames
|
| 254 |
+
global backups
|
| 255 |
frames = orig_frames
|
| 256 |
+
backups = backup_frames
|
| 257 |
depths = depth_frames
|
| 258 |
|
| 259 |
if depth_color.shape[0] == 2048: #height
|
|
|
|
| 428 |
return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
|
| 429 |
|
| 430 |
|
| 431 |
+
def seg_frame(newmask, b, d):
|
| 432 |
|
| 433 |
if newmask.shape[0] == 2048: #height
|
| 434 |
gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
|
|
|
| 440 |
newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
|
| 441 |
ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
| 442 |
|
| 443 |
+
#b = 1
|
| 444 |
+
#d = 32
|
| 445 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
|
| 446 |
bd = cv2.erode(newmask, element)
|
| 447 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
|
|
|
|
| 548 |
a1D = np.ravel_multi_index(a2D.T, col_range)
|
| 549 |
return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
|
| 550 |
|
| 551 |
+
def reset_mask(d):
|
| 552 |
global frame_selected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
global frames
|
| 554 |
+
global backups
|
| 555 |
global masks
|
| 556 |
global depths
|
| 557 |
global edge
|
| 558 |
|
| 559 |
edge = []
|
| 560 |
+
backup = cv2.imread(backups[frame_selected]).astype(np.uint8)
|
| 561 |
+
cv2.imwrite(frames[frame_selected], backup)
|
|
|
|
|
|
|
| 562 |
|
| 563 |
+
d["layers"][0] = (0,0,0,0)
|
|
|
|
| 564 |
|
| 565 |
+
return gr.ImageEditor(value=d)
|
|
|
|
|
|
|
| 566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 567 |
|
| 568 |
+
def draw_mask(o, b, v, d, evt: gr.EventData):
|
| 569 |
global frames
|
| 570 |
global depths
|
| 571 |
global params
|
|
|
|
| 963 |
document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
|
| 964 |
|
| 965 |
' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
|
|
|
|
| 966 |
reset = gr.Button("Reset", size='sm')
|
| 967 |
+
with gr.Accordion(label="Border", open=False):
|
| 968 |
+
boffset = gr.Slider(label="Offset", value=1, maximum=256, minimum=0, step=1)
|
| 969 |
+
bsize = gr.Slider(label="Size", value=32, maximum=256, minimum=0, step=1)
|
|
|
|
| 970 |
mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
|
| 971 |
+
mouse.input(fn=draw_mask, show_progress="minimal", inputs=[boffset, bsize, mouse, output_mask], outputs=[output_mask])
|
| 972 |
+
reset.click(fn=reset_mask, inputs=[output_mask], outputs=[output_mask])
|
|
|
|
| 973 |
|
| 974 |
normals_out = gr.Image(label="Normal map", interactive=False)
|
| 975 |
format_normals = gr.Radio(choices=["directx", "opengl"])
|
|
|
|
| 1175 |
|
| 1176 |
return output_video_path + (json.dumps(locations),)
|
| 1177 |
|
| 1178 |
+
submit.click(on_submit, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
| 1179 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
| 1180 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
| 1181 |
|
| 1182 |
+
example_files = [["./examples/streetview.mp4", "vits", blurin, 1, 32, example_coords]]
|
| 1183 |
+
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
| 1184 |
|
| 1185 |
|
| 1186 |
if __name__ == '__main__':
|