Surn commited on
Commit
6840a20
·
1 Parent(s): 57f215a

Update to 2025 codebase, gradio

Browse files
.gitignore CHANGED
@@ -43,4 +43,5 @@ workspace.code-workspace
43
  # log files
44
  .pnpm-debug.log
45
  venv/
46
- *.db-journal
 
 
43
  # log files
44
  .pnpm-debug.log
45
  venv/
46
+ *.db-journal
47
+ /.vs
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Dpt Depth Estimation + 3D
3
  emoji: ⚡
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 4.26.0
8
  app_file: app.py
9
  pinned: false
10
  short_description: Image to 3D with DPT + 3D Point Cloud
 
1
  ---
2
+ title: DPT Depth Estimation + 3D
3
  emoji: ⚡
4
  colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.16.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Image to 3D with DPT + 3D Point Cloud
app.py CHANGED
@@ -1,119 +1,202 @@
 
 
 
1
  import gradio as gr
2
- from transformers import DPTFeatureExtractor, DPTForDepthEstimation
3
- import torch
4
  import numpy as np
5
- from PIL import Image
6
  import open3d as o3d
7
- from pathlib import Path
8
- import os
 
9
 
10
- feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 
11
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
12
 
13
 
14
- def process_image(image_path):
 
 
 
 
 
 
 
 
 
15
  image_path = Path(image_path)
16
- image_raw = Image.open(image_path)
17
- image = image_raw.resize(
18
- (800, int(800 * image_raw.size[1] / image_raw.size[0])),
19
- Image.Resampling.LANCZOS,
20
- )
 
 
 
 
21
 
22
- # prepare image for the model
23
- encoding = feature_extractor(image, return_tensors="pt")
24
 
25
- # forward pass
26
  with torch.no_grad():
27
  outputs = model(**encoding)
28
  predicted_depth = outputs.predicted_depth
29
 
30
- # interpolate to original size
31
  prediction = torch.nn.functional.interpolate(
32
  predicted_depth.unsqueeze(1),
33
- size=image.size[::-1],
34
  mode="bicubic",
35
- align_corners=False,
36
  ).squeeze()
37
- output = prediction.cpu().numpy()
38
- depth_image = (output * 255 / np.max(output)).astype("uint8")
 
 
 
 
39
  try:
40
- gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
41
- img = Image.fromarray(depth_image)
42
- return [img, gltf_path, gltf_path]
43
- except Exception as e:
44
- gltf_path = create_3d_obj(np.array(image), depth_image, image_path, depth=8)
45
- img = Image.fromarray(depth_image)
46
- return [img, gltf_path, gltf_path]
47
- except:
48
- print("Error reconstructing 3D model")
49
- raise Exception("Error reconstructing 3D model")
50
-
51
-
52
- def create_3d_obj(rgb_image, depth_image, image_path, depth=10):
 
 
 
 
 
 
 
 
 
 
 
53
  depth_o3d = o3d.geometry.Image(depth_image)
54
  image_o3d = o3d.geometry.Image(rgb_image)
 
 
55
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
56
  image_o3d, depth_o3d, convert_rgb_to_intensity=False
57
  )
58
- w = int(depth_image.shape[1])
59
- h = int(depth_image.shape[0])
60
 
61
- camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
62
- camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
 
 
 
 
 
 
 
 
 
63
 
 
64
  pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
65
 
66
- print("normals")
67
- pcd.normals = o3d.utility.Vector3dVector(
68
- np.zeros((1, 3))
69
- ) # invalidate existing normals
 
 
 
 
70
  pcd.estimate_normals(
71
  search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
72
  )
73
- pcd.orient_normals_towards_camera_location(
74
- camera_location=np.array([0.0, 0.0, 1000.0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  )
76
- pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
77
- pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
78
-
79
- print("run Poisson surface reconstruction")
80
- with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
81
- mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
82
- pcd, depth=depth, width=0, scale=1.1, linear_fit=True
83
- )
84
 
85
- voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
86
- print(f"voxel_size = {voxel_size:e}")
87
  mesh = mesh_raw.simplify_vertex_clustering(
88
  voxel_size=voxel_size,
89
  contraction=o3d.geometry.SimplificationContraction.Average,
90
  )
 
91
 
92
- # vertices_to_remove = densities < np.quantile(densities, 0.001)
93
- # mesh.remove_vertices_by_mask(vertices_to_remove)
94
  bbox = pcd.get_axis_aligned_bounding_box()
95
  mesh_crop = mesh.crop(bbox)
96
- gltf_path = f"./{image_path.stem}.gltf"
 
 
97
  o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
98
  return gltf_path
99
 
100
 
101
- title = "Demo: zero-shot depth estimation with DPT + 3D Point Cloud"
102
- description = "This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  examples = [["examples/" + img] for img in os.listdir("examples/")]
104
 
105
  iface = gr.Interface(
106
  fn=process_image,
107
- inputs=[gr.Image(type="filepath", label="Input Image")],
 
 
 
 
108
  outputs=[
109
- gr.Image(label="predicted depth", type="pil"),
110
- gr.Model3D(label="3d mesh reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]),
111
- gr.File(label="3d gLTF"),
112
  ],
113
  title=title,
114
  description=description,
115
  examples=examples,
116
  allow_flagging="never",
117
  cache_examples=False,
 
118
  )
119
- iface.launch(debug=True, show_api=False)
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
  import gradio as gr
 
 
5
  import numpy as np
 
6
  import open3d as o3d
7
+ import torch
8
+ from PIL import Image
9
+ from transformers import DPTForDepthEstimation, DPTImageProcessor
10
 
11
+ # Initialize the image processor and depth estimation model
12
+ image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
13
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
14
 
15
 
16
+ def process_image(image_path, resized_width=800, z_scale=208):
17
+ """
18
+ Processes the input image to generate a depth map and a 3D mesh reconstruction.
19
+
20
+ Args:
21
+ image_path (str): The file path to the input image.
22
+
23
+ Returns:
24
+ list: A list containing the depth image, 3D mesh reconstruction, and GLTF file path.
25
+ """
26
  image_path = Path(image_path)
27
+ if not image_path.exists():
28
+ raise ValueError("Image file not found")
29
+
30
+ # Load and resize the image
31
+ image_raw = Image.open(image_path).convert("RGB")
32
+ print(f"Original size: {image_raw.size}")
33
+ resized_height = int(resized_width * image_raw.size[1] / image_raw.size[0])
34
+ image = image_raw.resize((resized_width, resized_height), Image.Resampling.LANCZOS)
35
+ print(f"Resized size: {image.size}")
36
 
37
+ # Prepare image for the model
38
+ encoding = image_processor(image, return_tensors="pt")
39
 
40
+ # Perform depth estimation
41
  with torch.no_grad():
42
  outputs = model(**encoding)
43
  predicted_depth = outputs.predicted_depth
44
 
45
+ # Interpolate depth to match the image size
46
  prediction = torch.nn.functional.interpolate(
47
  predicted_depth.unsqueeze(1),
48
+ size=(image.height, image.width),
49
  mode="bicubic",
50
+ align_corners=True,
51
  ).squeeze()
52
+
53
+ # Normalize the depth image to 8-bit
54
+ prediction = prediction.cpu().numpy()
55
+ depth_min, depth_max = prediction.min(), prediction.max()
56
+ depth_image = ((prediction - depth_min) / (depth_max - depth_min) * 255).astype("uint8")
57
+
58
  try:
59
+ gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=10, z_scale=z_scale)
60
+ except Exception:
61
+ gltf_path = create_3d_obj(np.array(image), prediction, image_path, depth=8, z_scale=z_scale)
62
+
63
+ img = Image.fromarray(depth_image)
64
+ return [img, gltf_path, gltf_path]
65
+
66
+
67
+ def create_3d_obj(rgb_image, raw_depth, image_path, depth=10, z_scale=200):
68
+ """
69
+ Creates a 3D object from RGB and depth images.
70
+
71
+ Args:
72
+ rgb_image (np.ndarray): The RGB image as a NumPy array.
73
+ raw_depth (np.ndarray): The raw depth data.
74
+ image_path (Path): The path to the original image.
75
+ depth (int, optional): Depth parameter for Poisson reconstruction. Defaults to 10.
76
+ z_scale (float, optional): Scaling factor for the Z-axis. Defaults to 200.
77
+
78
+ Returns:
79
+ str: The file path to the saved GLTF model.
80
+ """
81
+ # Normalize the depth image
82
+ depth_image = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min()) * 255).astype("uint8")
83
  depth_o3d = o3d.geometry.Image(depth_image)
84
  image_o3d = o3d.geometry.Image(rgb_image)
85
+
86
+ # Create RGBD image
87
  rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
88
  image_o3d, depth_o3d, convert_rgb_to_intensity=False
89
  )
 
 
90
 
91
+ height, width = depth_image.shape
92
+
93
+ # Define camera intrinsics
94
+ camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(
95
+ width,
96
+ height,
97
+ fx=1.0,
98
+ fy=1.0,
99
+ cx=width / 2.0,
100
+ cy=height / 2.0,
101
+ )
102
 
103
+ # Generate point cloud from RGBD image
104
  pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
105
 
106
+ # Scale the Z dimension
107
+ points = np.asarray(pcd.points)
108
+ depth_scaled = ((raw_depth - raw_depth.min()) / (raw_depth.max() - raw_depth.min())) * z_scale
109
+ z_values = depth_scaled.flatten()[:len(points)]
110
+ points[:, 2] *= z_values
111
+ pcd.points = o3d.utility.Vector3dVector(points)
112
+
113
+ # Estimate and orient normals
114
  pcd.estimate_normals(
115
  search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
116
  )
117
+ pcd.orient_normals_towards_camera_location(camera_location=np.array([0.0, 0.0, 2.0 ]))
118
+
119
+ # Apply transformations
120
+ pcd.transform([[1, 0, 0, 0],
121
+ [0, -1, 0, 0],
122
+ [0, 0, -1, 0],
123
+ [0, 0, 0, 1]])
124
+ pcd.transform([[-1, 0, 0, 0],
125
+ [0, 1, 0, 0],
126
+ [0, 0, 1, 0],
127
+ [0, 0, 0, 1]])
128
+
129
+ # Perform Poisson surface reconstruction
130
+ print(f"Running Poisson surface reconstruction with depth {depth}")
131
+ mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
132
+ pcd, depth=depth, width=0, scale=1.1, linear_fit=True
133
  )
134
+ print(f"Raw mesh vertices: {len(mesh_raw.vertices)}, triangles: {len(mesh_raw.triangles)}")
 
 
 
 
 
 
 
135
 
136
+ # Simplify the mesh using vertex clustering
137
+ voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / (max(width, height) * 0.8)
138
  mesh = mesh_raw.simplify_vertex_clustering(
139
  voxel_size=voxel_size,
140
  contraction=o3d.geometry.SimplificationContraction.Average,
141
  )
142
+ print(f"Simplified mesh vertices: {len(mesh.vertices)}, triangles: {len(mesh.triangles)}")
143
 
144
+ # Crop the mesh to the bounding box of the point cloud
 
145
  bbox = pcd.get_axis_aligned_bounding_box()
146
  mesh_crop = mesh.crop(bbox)
147
+
148
+ # Save the mesh as a GLTF file
149
+ gltf_path = f"./models/{image_path.stem}.gltf"
150
  o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
151
  return gltf_path
152
 
153
 
154
+ # Define Gradio interface components
155
+ title = "Demo: Zero-Shot Depth Estimation with DPT + 3D Point Cloud"
156
+ description = (
157
+ "This demo is a variation from the original "
158
+ "<a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. "
159
+ "It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object."
160
+ )
161
+ # Create Gradio sliders for resized_width and z_scale
162
+ resized_width_slider = gr.Slider(
163
+ minimum=400,
164
+ maximum=1600,
165
+ step=16,
166
+ value=800,
167
+ label="Resized Width",
168
+ info="Adjust the width to which the input image is resized."
169
+ )
170
+
171
+ z_scale_slider = gr.Slider(
172
+ minimum=160,
173
+ maximum=1024,
174
+ step=16,
175
+ value=208,
176
+ label="Z-Scale",
177
+ info="Adjust the scaling factor for the Z-axis in the 3D model."
178
+ )
179
  examples = [["examples/" + img] for img in os.listdir("examples/")]
180
 
181
  iface = gr.Interface(
182
  fn=process_image,
183
+ inputs=[
184
+ gr.Image(type="filepath", label="Input Image"),
185
+ resized_width_slider,
186
+ z_scale_slider
187
+ ],
188
  outputs=[
189
+ gr.Image(label="Predicted Depth", type="pil"),
190
+ gr.Model3D(label="3D Mesh Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]),
191
+ gr.File(label="3D GLTF"),
192
  ],
193
  title=title,
194
  description=description,
195
  examples=examples,
196
  allow_flagging="never",
197
  cache_examples=False,
198
+ theme="Surn/Beeuty"
199
  )
200
+
201
+ if __name__ == "__main__":
202
+ iface.launch(debug=True, show_api=False, favicon_path="./favicon.ico")
examples/canyon-terrain.png ADDED

Git LFS Details

  • SHA256: c29f3a1c560c156372f513a9e24e2244815356c4142b1e19601d27b0c1e55110
  • Pointer size: 132 Bytes
  • Size of remote file: 2.63 MB
examples/fractal-islands.png ADDED

Git LFS Details

  • SHA256: f320bcbeb4a45e3a71c445b3d9eac86165f1c7b8c1bf9ea7cca2d73b2747ccaf
  • Pointer size: 132 Bytes
  • Size of remote file: 3.15 MB
examples/logo_hex.png ADDED

Git LFS Details

  • SHA256: 9c0f91c488296e7234f829effe6da9d997704fa9b4e95739af7049d8d91db72b
  • Pointer size: 131 Bytes
  • Size of remote file: 547 kB
favicon.ico ADDED
requirements.txt CHANGED
@@ -2,6 +2,6 @@ torch
2
  transformers
3
  numpy
4
  Pillow
5
- gradio==4.26.0
6
  jinja2
7
  open3d
 
2
  transformers
3
  numpy
4
  Pillow
5
+ gradio>=5.16.0
6
  jinja2
7
  open3d
web-ui.bat ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ python311 -m app.py
2
+ pause