Spaces:
Runtime error
Runtime error
Commit
·
65acb9b
1
Parent(s):
fd28d2d
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ import tempfile
|
|
14 |
from mesh import get_mesh
|
15 |
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
-
|
18 |
# Inpainting pipeline
|
19 |
|
20 |
|
@@ -26,39 +26,57 @@ model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest i
|
|
26 |
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
|
27 |
#model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
|
28 |
|
29 |
-
|
|
|
30 |
|
31 |
-
midas.to(device)
|
32 |
-
midas.eval()
|
33 |
|
34 |
-
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
35 |
|
36 |
-
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
|
37 |
-
|
38 |
-
else:
|
39 |
-
|
|
|
|
|
|
|
40 |
|
|
|
|
|
41 |
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
|
46 |
-
|
47 |
-
|
|
|
48 |
|
49 |
-
prediction = torch.nn.functional.interpolate(
|
50 |
-
prediction.unsqueeze(1),
|
51 |
-
size=image.shape[:2],
|
52 |
-
mode="bicubic",
|
53 |
-
align_corners=False,
|
54 |
-
).squeeze()
|
55 |
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
return Image.fromarray(output.astype("int32"))
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
def read_content(file_path: str) -> str:
|
64 |
"""read the content of target file
|
@@ -69,17 +87,16 @@ def read_content(file_path: str) -> str:
|
|
69 |
return content
|
70 |
|
71 |
def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
|
|
|
72 |
if negative_prompt == "":
|
73 |
negative_prompt = None
|
74 |
-
scheduler_class_name = scheduler.split("-")[0]
|
75 |
|
76 |
init_image = cv2.resize(dict["image"], (512, 512))
|
77 |
|
78 |
mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
|
79 |
-
mask.save("temp_mask.jpg")
|
80 |
|
81 |
if (depth is None):
|
82 |
-
depth_image = estimate_depth(init_image)
|
83 |
|
84 |
else:
|
85 |
d_i = depth[:,:,0]
|
@@ -177,6 +194,7 @@ def create_vis_demo():
|
|
177 |
|
178 |
|
179 |
|
|
|
180 |
def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
|
181 |
if negative_prompt == "":
|
182 |
negative_prompt = None
|
@@ -187,7 +205,7 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
|
|
187 |
mask.save("temp_mask.jpg")
|
188 |
|
189 |
if (depth is None):
|
190 |
-
depth_image = estimate_depth(init_image)
|
191 |
|
192 |
else:
|
193 |
d_i = depth[:,:,0]
|
@@ -201,24 +219,13 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
|
|
201 |
|
202 |
output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
output_depth_vis = output_depth_vis.astype("uint8")
|
208 |
-
|
209 |
-
input_depth = np.array(depth_image)
|
210 |
-
input_depth_vis = (input_depth - np.min(input_depth)) / (np.max(input_depth) - np.min(input_depth)) * 255
|
211 |
-
input_depth_vis = input_depth_vis.astype("uint8")
|
212 |
-
|
213 |
-
#init_image
|
214 |
-
#depth_image
|
215 |
output_image = output.rgb[0]
|
216 |
|
217 |
-
|
218 |
-
output_mesh = get_mesh(
|
219 |
-
|
220 |
-
depth_image_mesh = input_depth_vis.max() - input_depth_vis
|
221 |
-
input_mesh = get_mesh(depth_image_mesh,init_image, keep_edges=keep_edges, skew=1)
|
222 |
|
223 |
return input_mesh, output_mesh, gr.update(visible=True)
|
224 |
|
|
|
14 |
from mesh import get_mesh
|
15 |
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
model_arch = "zoe"
|
18 |
# Inpainting pipeline
|
19 |
|
20 |
|
|
|
26 |
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
|
27 |
#model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
|
28 |
|
29 |
+
if model_arch == "midas":
|
30 |
+
midas = torch.hub.load("intel-isl/MiDaS", model_type)
|
31 |
|
32 |
+
midas.to(device)
|
33 |
+
midas.eval()
|
34 |
|
35 |
+
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
|
36 |
|
37 |
+
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
|
38 |
+
transform = midas_transforms.dpt_transform
|
39 |
+
else:
|
40 |
+
transform = midas_transforms.small_transform
|
41 |
+
|
42 |
+
def estimate_depth(image):
|
43 |
+
input_batch = transform(image).to(device)
|
44 |
|
45 |
+
with torch.no_grad():
|
46 |
+
prediction = midas(input_batch)
|
47 |
|
48 |
+
prediction = torch.nn.functional.interpolate(
|
49 |
+
prediction.unsqueeze(1),
|
50 |
+
size=image.shape[:2],
|
51 |
+
mode="bicubic",
|
52 |
+
align_corners=False,
|
53 |
+
).squeeze()
|
54 |
|
55 |
+
output = prediction.cpu().numpy()
|
56 |
|
57 |
+
output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
|
58 |
+
|
59 |
+
return Image.fromarray(output.astype("int32")), output.min(), output.max()
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
elif model_arch == "zoe":
|
63 |
+
# Zoe_N
|
64 |
+
repo = "isl-org/ZoeDepth"
|
65 |
+
model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
|
66 |
+
zoe = model_zoe_n.to(device)
|
67 |
|
68 |
+
def estimate_depth(image):
|
|
|
|
|
69 |
|
70 |
+
depth_tensor = zoe.infer_pil(image, output_type="tensor")
|
71 |
+
output = depth_tensor.cpu().numpy()
|
72 |
+
|
73 |
+
output_ = 65535 * (1 - (output - np.min(output))/(np.max(output) - np.min(output)))
|
74 |
+
|
75 |
+
return Image.fromarray(output_.astype("int32")), output.min(), output.max()
|
76 |
+
|
77 |
+
def denormalize(image, max, min):
|
78 |
+
image = (image / 65535 - 1 ) * (min - max) + min
|
79 |
+
return image
|
80 |
|
81 |
def read_content(file_path: str) -> str:
|
82 |
"""read the content of target file
|
|
|
87 |
return content
|
88 |
|
89 |
def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
|
90 |
+
|
91 |
if negative_prompt == "":
|
92 |
negative_prompt = None
|
|
|
93 |
|
94 |
init_image = cv2.resize(dict["image"], (512, 512))
|
95 |
|
96 |
mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
|
|
|
97 |
|
98 |
if (depth is None):
|
99 |
+
depth_image, _, _ = estimate_depth(init_image)
|
100 |
|
101 |
else:
|
102 |
d_i = depth[:,:,0]
|
|
|
194 |
|
195 |
|
196 |
|
197 |
+
|
198 |
def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
|
199 |
if negative_prompt == "":
|
200 |
negative_prompt = None
|
|
|
205 |
mask.save("temp_mask.jpg")
|
206 |
|
207 |
if (depth is None):
|
208 |
+
depth_image, min, max = estimate_depth(init_image)
|
209 |
|
210 |
else:
|
211 |
d_i = depth[:,:,0]
|
|
|
219 |
|
220 |
output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
|
221 |
|
222 |
+
depth_in = denormalize(np.array(depth_image), min, max)
|
223 |
+
depth_out = denormalize(np.array(output.depth[0]), min, max)
|
224 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
output_image = output.rgb[0]
|
226 |
|
227 |
+
input_mesh = get_mesh(depth_in,init_image, keep_edges=keep_edges)
|
228 |
+
output_mesh = get_mesh(depth_out, output_image, keep_edges=keep_edges)
|
|
|
|
|
|
|
229 |
|
230 |
return input_mesh, output_mesh, gr.update(visible=True)
|
231 |
|