Update app.py
Browse files
app.py
CHANGED
@@ -43,6 +43,7 @@ dcolor = []
|
|
43 |
pcolors = []
|
44 |
frame_selected = 0
|
45 |
frames = []
|
|
|
46 |
depths = []
|
47 |
masks = []
|
48 |
locations = []
|
@@ -75,7 +76,7 @@ def predict_depth(image, model):
|
|
75 |
#def predict_depth(model, image):
|
76 |
# return model(image)["depth"]
|
77 |
|
78 |
-
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin):
|
79 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
80 |
encoder = "vits"
|
81 |
|
@@ -157,6 +158,7 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
157 |
n = 0
|
158 |
depth_frames = []
|
159 |
orig_frames = []
|
|
|
160 |
thumbnail_old = []
|
161 |
|
162 |
while raw_video.isOpened():
|
@@ -189,12 +191,12 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
189 |
#white_lo = np.array([250,250,250])
|
190 |
#white_hi = np.array([255,255,255])
|
191 |
# mask image to only select white
|
192 |
-
mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*
|
193 |
# change image to black where we found white
|
194 |
-
depth_gray[0:int(depth_gray.shape[0]/8*
|
195 |
|
196 |
-
mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*
|
197 |
-
depth_gray[int(depth_gray.shape[0]/8*
|
198 |
|
199 |
depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
|
200 |
# split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
@@ -218,14 +220,19 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
218 |
count += 1
|
219 |
continue
|
220 |
thumbnail_old = thumbnail
|
|
|
|
|
221 |
|
222 |
-
cv2.imwrite(f"f{count}.png",
|
223 |
orig_frames.append(f"f{count}.png")
|
|
|
|
|
|
|
224 |
|
225 |
cv2.imwrite(f"f{count}_dmap.png", depth_color)
|
226 |
depth_frames.append(f"f{count}_dmap.png")
|
227 |
|
228 |
-
depth_gray = seg_frame(depth_gray) + 128
|
229 |
print(depth_gray[depth_gray>128]-128)
|
230 |
|
231 |
cv2.imwrite(f"f{count}_mask.png", depth_gray)
|
@@ -244,7 +251,9 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data
|
|
244 |
global frame_selected
|
245 |
global depths
|
246 |
global frames
|
|
|
247 |
frames = orig_frames
|
|
|
248 |
depths = depth_frames
|
249 |
|
250 |
if depth_color.shape[0] == 2048: #height
|
@@ -419,7 +428,7 @@ def show_json(txt):
|
|
419 |
return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
|
420 |
|
421 |
|
422 |
-
def seg_frame(newmask):
|
423 |
|
424 |
if newmask.shape[0] == 2048: #height
|
425 |
gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
@@ -431,8 +440,8 @@ def seg_frame(newmask):
|
|
431 |
newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
|
432 |
ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
433 |
|
434 |
-
b = 1
|
435 |
-
d = 32
|
436 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
|
437 |
bd = cv2.erode(newmask, element)
|
438 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
|
@@ -539,44 +548,24 @@ def bincount(a):
|
|
539 |
a1D = np.ravel_multi_index(a2D.T, col_range)
|
540 |
return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
|
541 |
|
542 |
-
def reset_mask():
|
543 |
global frame_selected
|
544 |
-
global masks
|
545 |
-
global depths
|
546 |
-
global edge
|
547 |
-
|
548 |
-
edge = []
|
549 |
-
mask = cv2.imread(depths[frame_selected]).astype(np.uint8)
|
550 |
-
cv2.imwrite(masks[frame_selected], cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY))
|
551 |
-
return masks[frame_selected], depths
|
552 |
-
|
553 |
-
def apply_mask(d,b):
|
554 |
global frames
|
555 |
-
global
|
556 |
global masks
|
557 |
global depths
|
558 |
global edge
|
559 |
|
560 |
edge = []
|
561 |
-
|
562 |
-
|
563 |
-
mask = mask - 128
|
564 |
-
print(mask[mask>0])
|
565 |
|
566 |
-
|
567 |
-
mask = np.where((mask==2)|(mask==0),1,0).astype('uint8')
|
568 |
|
569 |
-
|
570 |
-
frame[:, :, 3] = mask * 255
|
571 |
-
cv2.imwrite(frames[frame_selected], frame)
|
572 |
|
573 |
-
mask = cv2.imread(masks[frame_selected], cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
574 |
-
mask[mask==128] = 0
|
575 |
-
d["layers"][0] = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGBA)
|
576 |
-
|
577 |
-
return gr.ImageEditor(value=d), depths, frames
|
578 |
|
579 |
-
def draw_mask(
|
580 |
global frames
|
581 |
global depths
|
582 |
global params
|
@@ -974,16 +963,13 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
974 |
document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
|
975 |
|
976 |
' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
|
977 |
-
apply = gr.Button("Apply", size='sm')
|
978 |
reset = gr.Button("Reset", size='sm')
|
979 |
-
with gr.Accordion(label="
|
980 |
-
|
981 |
-
|
982 |
-
bsize = gr.Slider(label="Border size", value=15, maximum=256, minimum=1, step=2)
|
983 |
mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
|
984 |
-
mouse.input(fn=draw_mask, show_progress="minimal", inputs=[
|
985 |
-
|
986 |
-
reset.click(fn=reset_mask, inputs=None, outputs=[output_mask, output_depth])
|
987 |
|
988 |
normals_out = gr.Image(label="Normal map", interactive=False)
|
989 |
format_normals = gr.Radio(choices=["directx", "opengl"])
|
@@ -1189,12 +1175,12 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
1189 |
|
1190 |
return output_video_path + (json.dumps(locations),)
|
1191 |
|
1192 |
-
submit.click(on_submit, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1193 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
1194 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
1195 |
|
1196 |
-
example_files = [["./examples/streetview.mp4", "vits", blurin, example_coords]]
|
1197 |
-
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1198 |
|
1199 |
|
1200 |
if __name__ == '__main__':
|
|
|
43 |
pcolors = []
|
44 |
frame_selected = 0
|
45 |
frames = []
|
46 |
+
backups = []
|
47 |
depths = []
|
48 |
masks = []
|
49 |
locations = []
|
|
|
76 |
#def predict_depth(model, image):
|
77 |
# return model(image)["depth"]
|
78 |
|
79 |
+
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', blur_data=blurin, o=1, b=32):
|
80 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
81 |
encoder = "vits"
|
82 |
|
|
|
158 |
n = 0
|
159 |
depth_frames = []
|
160 |
orig_frames = []
|
161 |
+
backup_frames = []
|
162 |
thumbnail_old = []
|
163 |
|
164 |
while raw_video.isOpened():
|
|
|
191 |
#white_lo = np.array([250,250,250])
|
192 |
#white_hi = np.array([255,255,255])
|
193 |
# mask image to only select white
|
194 |
+
mask = cv2.inRange(depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]], 250, 255)
|
195 |
# change image to black where we found white
|
196 |
+
depth_gray[0:int(depth_gray.shape[0]/8*7)-1, 0:depth_gray.shape[1]][mask>0] = 0
|
197 |
|
198 |
+
mask = cv2.inRange(depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]], 192, 255)
|
199 |
+
depth_gray[int(depth_gray.shape[0]/8*7):depth_gray.shape[0], 0:depth_gray.shape[1]][mask>0] = 192
|
200 |
|
201 |
depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
|
202 |
# split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
|
|
|
220 |
count += 1
|
221 |
continue
|
222 |
thumbnail_old = thumbnail
|
223 |
+
|
224 |
+
blur_frame = blur_image(raw_frame, depth_color, blur_data)
|
225 |
|
226 |
+
cv2.imwrite(f"f{count}.png", blur_frame)
|
227 |
orig_frames.append(f"f{count}.png")
|
228 |
+
|
229 |
+
cv2.imwrite(f"f{count}_.png", blur_frame)
|
230 |
+
backup_frames.append(f"f{count}_.png")
|
231 |
|
232 |
cv2.imwrite(f"f{count}_dmap.png", depth_color)
|
233 |
depth_frames.append(f"f{count}_dmap.png")
|
234 |
|
235 |
+
depth_gray = seg_frame(depth_gray, o, b) + 128
|
236 |
print(depth_gray[depth_gray>128]-128)
|
237 |
|
238 |
cv2.imwrite(f"f{count}_mask.png", depth_gray)
|
|
|
251 |
global frame_selected
|
252 |
global depths
|
253 |
global frames
|
254 |
+
global backups
|
255 |
frames = orig_frames
|
256 |
+
backups = backup_frames
|
257 |
depths = depth_frames
|
258 |
|
259 |
if depth_color.shape[0] == 2048: #height
|
|
|
428 |
return data[0]["video"]["path"], data[1]["path"], data[2], data[3]["background"]["path"], data[4], data[5]
|
429 |
|
430 |
|
431 |
+
def seg_frame(newmask, b, d):
|
432 |
|
433 |
if newmask.shape[0] == 2048: #height
|
434 |
gd = cv2.imread('./gradient_large.png', cv2.IMREAD_GRAYSCALE).astype(np.uint8)
|
|
|
440 |
newmask[np.absolute(newmask.astype(np.int16)-gd.astype(np.int16))<16] = 0
|
441 |
ret,newmask = cv2.threshold(newmask,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
|
442 |
|
443 |
+
#b = 1
|
444 |
+
#d = 32
|
445 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * b + 1, 2 * b + 1), (b, b))
|
446 |
bd = cv2.erode(newmask, element)
|
447 |
element = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2 * d + 1, 2 * d + 1), (d, d))
|
|
|
548 |
a1D = np.ravel_multi_index(a2D.T, col_range)
|
549 |
return list(reversed(np.unravel_index(np.bincount(a1D).argmax(), col_range)))
|
550 |
|
551 |
+
def reset_mask(d):
|
552 |
global frame_selected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
553 |
global frames
|
554 |
+
global backups
|
555 |
global masks
|
556 |
global depths
|
557 |
global edge
|
558 |
|
559 |
edge = []
|
560 |
+
backup = cv2.imread(backups[frame_selected]).astype(np.uint8)
|
561 |
+
cv2.imwrite(frames[frame_selected], backup)
|
|
|
|
|
562 |
|
563 |
+
d["layers"][0] = (0,0,0,0)
|
|
|
564 |
|
565 |
+
return gr.ImageEditor(value=d)
|
|
|
|
|
566 |
|
|
|
|
|
|
|
|
|
|
|
567 |
|
568 |
+
def draw_mask(o, b, v, d, evt: gr.EventData):
|
569 |
global frames
|
570 |
global depths
|
571 |
global params
|
|
|
963 |
document.getElementById(\"mouse\").getElementsByTagName(\"textarea\")[0].value = \"[]\";
|
964 |
|
965 |
' title='Clear selection' style='text-decoration:none;color:white;'>✕ Clear</a>""")
|
|
|
966 |
reset = gr.Button("Reset", size='sm')
|
967 |
+
with gr.Accordion(label="Border", open=False):
|
968 |
+
boffset = gr.Slider(label="Offset", value=1, maximum=256, minimum=0, step=1)
|
969 |
+
bsize = gr.Slider(label="Size", value=32, maximum=256, minimum=0, step=1)
|
|
|
970 |
mouse = gr.Textbox(elem_id="mouse", value="""[]""", interactive=False)
|
971 |
+
mouse.input(fn=draw_mask, show_progress="minimal", inputs=[boffset, bsize, mouse, output_mask], outputs=[output_mask])
|
972 |
+
reset.click(fn=reset_mask, inputs=[output_mask], outputs=[output_mask])
|
|
|
973 |
|
974 |
normals_out = gr.Image(label="Normal map", interactive=False)
|
975 |
format_normals = gr.Radio(choices=["directx", "opengl"])
|
|
|
1175 |
|
1176 |
return output_video_path + (json.dumps(locations),)
|
1177 |
|
1178 |
+
submit.click(on_submit, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1179 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
1180 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
1181 |
|
1182 |
+
example_files = [["./examples/streetview.mp4", "vits", blurin, 1, 32, example_coords]]
|
1183 |
+
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, blur_in, boffset, bsize, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1184 |
|
1185 |
|
1186 |
if __name__ == '__main__':
|