pablodawson commited on
Commit
65acb9b
·
1 Parent(s): fd28d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -43
app.py CHANGED
@@ -14,7 +14,7 @@ import tempfile
14
  from mesh import get_mesh
15
 
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
-
18
  # Inpainting pipeline
19
 
20
 
@@ -26,39 +26,57 @@ model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest i
26
  #model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
27
  #model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
28
 
29
- midas = torch.hub.load("intel-isl/MiDaS", model_type)
 
30
 
31
- midas.to(device)
32
- midas.eval()
33
 
34
- midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
35
 
36
- if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
37
- transform = midas_transforms.dpt_transform
38
- else:
39
- transform = midas_transforms.small_transform
 
 
 
40
 
 
 
41
 
42
- def estimate_depth(image):
 
 
 
 
 
43
 
44
- input_batch = transform(image).to(device)
45
 
46
- with torch.no_grad():
47
- prediction = midas(input_batch)
 
48
 
49
- prediction = torch.nn.functional.interpolate(
50
- prediction.unsqueeze(1),
51
- size=image.shape[:2],
52
- mode="bicubic",
53
- align_corners=False,
54
- ).squeeze()
55
 
56
- output = prediction.cpu().numpy()
 
 
 
 
57
 
58
- output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
59
-
60
- return Image.fromarray(output.astype("int32"))
61
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  def read_content(file_path: str) -> str:
64
  """read the content of target file
@@ -69,17 +87,16 @@ def read_content(file_path: str) -> str:
69
  return content
70
 
71
  def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
 
72
  if negative_prompt == "":
73
  negative_prompt = None
74
- scheduler_class_name = scheduler.split("-")[0]
75
 
76
  init_image = cv2.resize(dict["image"], (512, 512))
77
 
78
  mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
79
- mask.save("temp_mask.jpg")
80
 
81
  if (depth is None):
82
- depth_image = estimate_depth(init_image)
83
 
84
  else:
85
  d_i = depth[:,:,0]
@@ -177,6 +194,7 @@ def create_vis_demo():
177
 
178
 
179
 
 
180
  def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
181
  if negative_prompt == "":
182
  negative_prompt = None
@@ -187,7 +205,7 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
187
  mask.save("temp_mask.jpg")
188
 
189
  if (depth is None):
190
- depth_image = estimate_depth(init_image)
191
 
192
  else:
193
  d_i = depth[:,:,0]
@@ -201,24 +219,13 @@ def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale
201
 
202
  output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
203
 
204
- depth_out = np.array(output.depth[0])
205
-
206
- output_depth_vis = (depth_out - np.min(depth_out)) / (np.max(depth_out) - np.min(depth_out)) * 255
207
- output_depth_vis = output_depth_vis.astype("uint8")
208
-
209
- input_depth = np.array(depth_image)
210
- input_depth_vis = (input_depth - np.min(input_depth)) / (np.max(input_depth) - np.min(input_depth)) * 255
211
- input_depth_vis = input_depth_vis.astype("uint8")
212
-
213
- #init_image
214
- #depth_image
215
  output_image = output.rgb[0]
216
 
217
- mesh_depth = output_depth_vis.max() - output_depth_vis
218
- output_mesh = get_mesh(mesh_depth, output_image, keep_edges=keep_edges, skew=1)
219
-
220
- depth_image_mesh = input_depth_vis.max() - input_depth_vis
221
- input_mesh = get_mesh(depth_image_mesh,init_image, keep_edges=keep_edges, skew=1)
222
 
223
  return input_mesh, output_mesh, gr.update(visible=True)
224
 
 
14
  from mesh import get_mesh
15
 
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ model_arch = "zoe"
18
  # Inpainting pipeline
19
 
20
 
 
26
  #model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
27
  #model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
28
 
29
+ if model_arch == "midas":
30
+ midas = torch.hub.load("intel-isl/MiDaS", model_type)
31
 
32
+ midas.to(device)
33
+ midas.eval()
34
 
35
+ midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
36
 
37
+ if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
38
+ transform = midas_transforms.dpt_transform
39
+ else:
40
+ transform = midas_transforms.small_transform
41
+
42
+ def estimate_depth(image):
43
+ input_batch = transform(image).to(device)
44
 
45
+ with torch.no_grad():
46
+ prediction = midas(input_batch)
47
 
48
+ prediction = torch.nn.functional.interpolate(
49
+ prediction.unsqueeze(1),
50
+ size=image.shape[:2],
51
+ mode="bicubic",
52
+ align_corners=False,
53
+ ).squeeze()
54
 
55
+ output = prediction.cpu().numpy()
56
 
57
+ output= 65535 * (output - np.min(output))/(np.max(output) - np.min(output))
58
+
59
+ return Image.fromarray(output.astype("int32")), output.min(), output.max()
60
 
 
 
 
 
 
 
61
 
62
+ elif model_arch == "zoe":
63
+ # Zoe_N
64
+ repo = "isl-org/ZoeDepth"
65
+ model_zoe_n = torch.hub.load(repo, "ZoeD_N", pretrained=True)
66
+ zoe = model_zoe_n.to(device)
67
 
68
+ def estimate_depth(image):
 
 
69
 
70
+ depth_tensor = zoe.infer_pil(image, output_type="tensor")
71
+ output = depth_tensor.cpu().numpy()
72
+
73
+ output_ = 65535 * (1 - (output - np.min(output))/(np.max(output) - np.min(output)))
74
+
75
+ return Image.fromarray(output_.astype("int32")), output.min(), output.max()
76
+
77
+ def denormalize(image, max, min):
78
+ image = (image / 65535 - 1 ) * (min - max) + min
79
+ return image
80
 
81
  def read_content(file_path: str) -> str:
82
  """read the content of target file
 
87
  return content
88
 
89
  def predict_images(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler"):
90
+
91
  if negative_prompt == "":
92
  negative_prompt = None
 
93
 
94
  init_image = cv2.resize(dict["image"], (512, 512))
95
 
96
  mask = Image.fromarray(cv2.resize(dict["mask"], (512, 512))[:,:,0])
 
97
 
98
  if (depth is None):
99
+ depth_image, _, _ = estimate_depth(init_image)
100
 
101
  else:
102
  d_i = depth[:,:,0]
 
194
 
195
 
196
 
197
+
198
  def predict_images_3d(dict, depth, prompt="", negative_prompt="", guidance_scale=7.5, steps=20, strength=1.0, scheduler="EulerDiscreteScheduler", keep_edges=False):
199
  if negative_prompt == "":
200
  negative_prompt = None
 
205
  mask.save("temp_mask.jpg")
206
 
207
  if (depth is None):
208
+ depth_image, min, max = estimate_depth(init_image)
209
 
210
  else:
211
  d_i = depth[:,:,0]
 
219
 
220
  output = pipe(prompt = prompt, negative_prompt=negative_prompt, image=init_image, mask_image=mask, depth_image=depth_image, guidance_scale=guidance_scale, num_inference_steps=int(steps), strength=strength)
221
 
222
+ depth_in = denormalize(np.array(depth_image), min, max)
223
+ depth_out = denormalize(np.array(output.depth[0]), min, max)
224
+
 
 
 
 
 
 
 
 
225
  output_image = output.rgb[0]
226
 
227
+ input_mesh = get_mesh(depth_in,init_image, keep_edges=keep_edges)
228
+ output_mesh = get_mesh(depth_out, output_image, keep_edges=keep_edges)
 
 
 
229
 
230
  return input_mesh, output_mesh, gr.update(visible=True)
231