Spaces:

gradio
/

depth_estimation_main

Build error

App Files Files Community

aliabd HF Staff commited on Nov 20, 2022

Commit

ad2bf54

1 Parent(s): e88437a

Upload with huggingface_hub

Browse files

Files changed (2) hide show

requirements.txt +1 -1
run.ipynb +1 -0

requirements.txt CHANGED Viewed

@@ -4,4 +4,4 @@ numpy
 Pillow
 jinja2
 open3d
-https://gradio-main-build.s3.amazonaws.com/8edc91ff4b45981ad3e96288b404f9143115b178/gradio-3.10.1-py3-none-any.whl

 Pillow
 jinja2
 open3d
+https://gradio-main-build.s3.amazonaws.com/4f24e146dac42a85de708d147f488fdf418d4a7f/gradio-3.10.1-py3-none-any.whl

run.ipynb ADDED Viewed

	@@ -0,0 +1 @@

+ {"cells": [{"cell_type": "markdown", "id": 302934307671667531413257853548643485645, "metadata": {}, "source": ["# Gradio Demo: depth_estimation\n", "### A demo for predicting the depth of an image and generating a 3D model of it.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": 272996653310673477252411125948039410165, "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch git+https://github.com/nielsrogge/transformers.git@add_dpt_redesign#egg=transformers numpy Pillow jinja2 open3d"]}, {"cell_type": "code", "execution_count": null, "id": 288918539441861185822528903084949547379, "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('examples')\n", "!wget -q -O examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/packages.txt"]}, {"cell_type": "code", "execution_count": null, "id": 44380577570523278879349135829904343037, "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import DPTFeatureExtractor, DPTForDepthEstimation\n", "import torch\n", "import numpy as np\n", "from PIL import Image\n", "import open3d as o3d\n", "from pathlib import Path\n", "import os\n", "\n", "feature_extractor = DPTFeatureExtractor.from_pretrained(\"Intel/dpt-large\")\n", "model = DPTForDepthEstimation.from_pretrained(\"Intel/dpt-large\")\n", "\n", "def process_image(image_path):\n", " image_path = Path(image_path)\n", " image_raw = Image.open(image_path)\n", " image = image_raw.resize(\n", " (800, int(800 * image_raw.size[1] / image_raw.size[0])),\n", " Image.Resampling.LANCZOS)\n", "\n", " # prepare image for the model\n", " encoding = feature_extractor(image, return_tensors=\"pt\")\n", "\n", " # forward pass\n", " with torch.no_grad():\n", " outputs = model(**encoding)\n", " predicted_depth = outputs.predicted_depth\n", "\n", " # interpolate to original size\n", " prediction = torch.nn.functional.interpolate(\n", " predicted_depth.unsqueeze(1),\n", " size=image.size[::-1],\n", " mode=\"bicubic\",\n", " align_corners=False,\n", " ).squeeze()\n", " output = prediction.cpu().numpy()\n", " depth_image = (output * 255 / np.max(output)).astype('uint8')\n", " try:\n", " gltf_path = create_3d_obj(np.array(image), depth_image, image_path)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except Exception as e:\n", " gltf_path = create_3d_obj(\n", " np.array(image), depth_image, image_path, depth=8)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except:\n", " print(\"Error reconstructing 3D model\")\n", " raise Exception(\"Error reconstructing 3D model\")\n", "\n", "\n", "def create_3d_obj(rgb_image, depth_image, image_path, depth=10):\n", " depth_o3d = o3d.geometry.Image(depth_image)\n", " image_o3d = o3d.geometry.Image(rgb_image)\n", " rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", " image_o3d, depth_o3d, convert_rgb_to_intensity=False)\n", " w = int(depth_image.shape[1])\n", " h = int(depth_image.shape[0])\n", "\n", " camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()\n", " camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)\n", "\n", " pcd = o3d.geometry.PointCloud.create_from_rgbd_image(\n", " rgbd_image, camera_intrinsic)\n", "\n", " print('normals')\n", " pcd.normals = o3d.utility.Vector3dVector(\n", " np.zeros((1, 3))) # invalidate existing normals\n", " pcd.estimate_normals(\n", " search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))\n", " pcd.orient_normals_towards_camera_location(\n", " camera_location=np.array([0., 0., 1000.]))\n", " pcd.transform([[1, 0, 0, 0],\n", " [0, -1, 0, 0],\n", " [0, 0, -1, 0],\n", " [0, 0, 0, 1]])\n", " pcd.transform([[-1, 0, 0, 0],\n", " [0, 1, 0, 0],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 1]])\n", "\n", " print('run Poisson surface reconstruction')\n", " with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:\n", " mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(\n", " pcd, depth=depth, width=0, scale=1.1, linear_fit=True)\n", "\n", " voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256\n", " print(f'voxel_size = {voxel_size:e}')\n", " mesh = mesh_raw.simplify_vertex_clustering(\n", " voxel_size=voxel_size,\n", " contraction=o3d.geometry.SimplificationContraction.Average)\n", "\n", " # vertices_to_remove = densities < np.quantile(densities, 0.001)\n", " # mesh.remove_vertices_by_mask(vertices_to_remove)\n", " bbox = pcd.get_axis_aligned_bounding_box()\n", " mesh_crop = mesh.crop(bbox)\n", " gltf_path = f'./{image_path.stem}.gltf'\n", " o3d.io.write_triangle_mesh(\n", " gltf_path, mesh_crop, write_triangle_uvs=True)\n", " return gltf_path\n", "\n", "title = \"Demo: zero-shot depth estimation with DPT + 3D Point Cloud\"\n", "description = \"This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object.\"\n", "examples = [[\"examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\"]]\n", "\n", "iface = gr.Interface(fn=process_image,\n", " inputs=[gr.Image(\n", " type=\"filepath\", label=\"Input Image\")],\n", " outputs=[gr.Image(label=\"predicted depth\", type=\"pil\"),\n", " gr.Model3D(label=\"3d mesh reconstruction\", clear_color=[\n", " 1.0, 1.0, 1.0, 1.0]),\n", " gr.File(label=\"3d gLTF\")],\n", " title=title,\n", " description=description,\n", " examples=examples,\n", " allow_flagging=\"never\",\n", " cache_examples=False)\n", "\n", "iface.launch(debug=True, enable_queue=False)"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}