ldm3d-inpainting

Runtime error

App Files Files Community

pablodawson commited on Oct 1, 2023

Commit

65acb9b

1 Parent(s): fd28d2d

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -43

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ import tempfile
 from mesh import get_mesh
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Inpainting pipeline
@@ -26,39 +26,57 @@ model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest i
 #model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
 #model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)
-midas = torch.hub.load("intel-isl/MiDaS", model_type)
-midas.to(device)
-midas.eval()
-midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
-if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
-    transform = midas_transforms.dpt_transform
-else:
-    transform = midas_transforms.small_transform
-def estimate_depth(image):
-    input_batch = transform(image).to(device)
-    with torch.no_grad():
-        prediction = midas(input_batch)
-        prediction = torch.nn.functional.interpolate(
-            prediction.unsqueeze(1),
-            size=image.shape[:2],
-            mode="bicubic",
-            align_corners=False,
-        ).squeeze()
-    output = prediction.cpu().numpy()
-    output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
-    return Image.fromarray(output.astype("int32"))
 def read_content(file_path: str) -> str:
     """read the content of target file
@@ -69,17 +87,16 @@ def read_content(file_path: str) -> str:
     return content
 def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
     if negative_prompt == "":
         negative_prompt = None
-    scheduler_class_name = scheduler.split("-")[0]
     init_image = cv2.resize(dict["image"], (512, 512))
     mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
-    mask.save("temp_mask.jpg")
     if (depth is None):
-        depth_image = estimate_depth(init_image)
     else:
         d_i = depth[:,:,0]
@@ -177,6 +194,7 @@ def create_vis_demo():
 def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
     if negative_prompt == "":
         negative_prompt = None
@@ -187,7 +205,7 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
     mask.save("temp_mask.jpg")
     if (depth is None):
-        depth_image = estimate_depth(init_image)
     else:
         d_i = depth[:,:,0]
@@ -201,24 +219,13 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
     output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
-    depth_out = np.array(output.depth[0])
-    output_depth_vis = (depth_out - np.min(depth_out)) / (np.max(depth_out) - np.min(depth_out)) * 255
-    output_depth_vis = output_depth_vis.astype("uint8")
-    input_depth = np.array(depth_image)
-    input_depth_vis = (input_depth - np.min(input_depth)) / (np.max(input_depth) - np.min(input_depth)) * 255
-    input_depth_vis = input_depth_vis.astype("uint8")
-    #init_image
-    #depth_image
     output_image = output.rgb[0]
-    mesh_depth = output_depth_vis.max() - output_depth_vis
-    output_mesh = get_mesh(mesh_depth, output_image, keep_edges=keep_edges, skew=1)
-    depth_image_mesh = input_depth_vis.max() - input_depth_vis
-    input_mesh = get_mesh(depth_image_mesh,init_image, keep_edges=keep_edges, skew=1)
     return input_mesh, output_mesh, gr.update(visible=True)

 from mesh import get_mesh
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_arch = "zoe"
 # Inpainting pipeline
 #model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
 #model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)
+if model_arch == "midas":
+    midas = torch.hub.load("intel-isl/MiDaS", model_type)
+    midas.to(device)
+    midas.eval()
+    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
+    if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
+        transform = midas_transforms.dpt_transform
+    else:
+        transform = midas_transforms.small_transform
+    def estimate_depth(image):
+        input_batch = transform(image).to(device)
+        with torch.no_grad():
+            prediction = midas(input_batch)
+            prediction = torch.nn.functional.interpolate(
+                prediction.unsqueeze(1),
+                size=image.shape[:2],
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze()
+        output = prediction.cpu().numpy()
+        output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
+        return Image.fromarray(output.astype("int32")), output.min(), output.max()
+elif model_arch == "zoe":
+    # Zoe_N
+    repo = "isl-org/ZoeDepth"
+    model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
+    zoe = model_zoe_n.to(device)
+    def estimate_depth(image):
+        depth_tensor = zoe.infer_pil(image, output_type="tensor")
+        output = depth_tensor.cpu().numpy()
+        output_ = 65535 * (1 - (output - np.min(output))/(np.max(output) - np.min(output)))
+        return Image.fromarray(output_.astype("int32")), output.min(), output.max()
+def denormalize(image, max, min):
+    image =  (image / 65535 - 1 ) * (min - max) + min
+    return image
 def read_content(file_path: str) -> str:
     """read the content of target file
     return content
 def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
     if negative_prompt == "":
         negative_prompt = None
     init_image = cv2.resize(dict["image"], (512, 512))
     mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
     if (depth is None):
+        depth_image, _, _ = estimate_depth(init_image)
     else:
         d_i = depth[:,:,0]
 def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
     if negative_prompt == "":
         negative_prompt = None
     mask.save("temp_mask.jpg")
     if (depth is None):
+        depth_image, min, max = estimate_depth(init_image)
     else:
         d_i = depth[:,:,0]
     output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
+    depth_in = denormalize(np.array(depth_image), min, max)
+    depth_out = denormalize(np.array(output.depth[0]), min, max)
     output_image = output.rgb[0]
+    input_mesh = get_mesh(depth_in,init_image, keep_edges=keep_edges)
+    output_mesh = get_mesh(depth_out, output_image, keep_edges=keep_edges)
     return input_mesh, output_mesh, gr.update(visible=True)