dpt-depth04

Running

App Files Files Community

adpro commited on Jul 7

Commit

029b4b5

verified ·

1 Parent(s): c496f78

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -32

app.py CHANGED Viewed

@@ -1,53 +1,101 @@
 import gradio as gr
-from transformers import DPTFeatureExtractor, DPTForDepthEstimation
 import torch
 import numpy as np
-from PIL import Image
-# Load model và feature extractor
-feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
-model.eval()
-def process_image(image):
-    # Đảm bảo ảnh là RGB
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    # Encode
     encoding = feature_extractor(image, return_tensors="pt")
-    # Dự đoán depth
     with torch.no_grad():
         outputs = model(**encoding)
         predicted_depth = outputs.predicted_depth
-    # Resize về kích thước ảnh gốc
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=image.size[::-1],
         mode="bicubic",
-        align_corners=False
     ).squeeze()
-    # Chuẩn hóa và chuyển về ảnh uint8
     output = prediction.cpu().numpy()
-    output = (output - np.min(output)) / (np.max(output) - np.min(output))  # normalize
-    formatted = (output * 255).astype("uint8")
-    depth_img = Image.fromarray(formatted)
-    return depth_img
-    return result
-title = "Demo: zero-shot depth estimation with DPT"
-description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
-iface = gr.Interface(
-    fn=process_image,
-    inputs=gr.inputs.Image(type="pil", label="Input Image"),
-    outputs=gr.outputs.Image(type="pil", label="Predicted Depth"),
-    title=title,
-    description=description,
-)
-iface.launch(debug=True)

+from doctest import Example
 import gradio as gr
+from transformers import DPTImageProcessor, DPTForDepthEstimation
 import torch
 import numpy as np
+from PIL import Image, ImageOps
+from pathlib import Path
+import glob
+from autostereogram.converter import StereogramConverter
+from datetime import datetime
+import time
+import tempfile
+feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+stereo_converter = StereogramConverter()
+def process_image(image_path):
+    print("\n\n\n")
+    print("Processing image:", image_path)
+    last_time = time.time()
+    image_raw = Image.open(Path(image_path))
+    image = image_raw.resize(
+        (1280, int(1280 * image_raw.size[1] / image_raw.size[0])),
+        Image.Resampling.LANCZOS,
+    )
+    # prepare image for the model
     encoding = feature_extractor(image, return_tensors="pt")
+    # forward pass
     with torch.no_grad():
         outputs = model(**encoding)
         predicted_depth = outputs.predicted_depth
+    # interpolate to original size
     prediction = torch.nn.functional.interpolate(
         predicted_depth.unsqueeze(1),
         size=image.size[::-1],
         mode="bicubic",
+        align_corners=False,
     ).squeeze()
     output = prediction.cpu().numpy()
+    depth_image = (output * 255 / np.max(output)).astype("uint8")
+    depth_image_padded = np.array(
+        ImageOps.pad(Image.fromarray(depth_image), (1280, 720))
+    )
+    stereo_image = stereo_converter.convert_depth_to_stereogram_with_thread_pool(
+        depth_image_padded, False
+    ).astype(np.uint8)
+    stereo_image_pil = Image.fromarray(stereo_image).convert("RGB")
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
+        image_name = f.name
+        stereo_image_pil.save(image_name)
+    return [depth_image_padded, stereo_image, image_name]
+examples_images = [[f] for f in sorted(glob.glob("examples/*.jpg"))]
+with gr.Blocks() as blocks:
+    gr.Markdown(
+        """
+## Depth Image to Autostereogram (Magic Eye)
+This demo is a variation from the original [DPT Demo](https://huggingface.co/spaces/nielsr/dpt-depth-estimation).
+Zero-shot depth estimation from an image, then it uses [pystereogram](https://github.com/yxiao1996/pystereogram)
+to generate the autostereogram (Magic Eye)
+<base target="_blank">
+"""
+    )
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="filepath", label="Input Image")
+            button = gr.Button("Predict")
+        with gr.Column():
+            predicted_depth = gr.Image(label="Predicted Depth", type="pil")
+    with gr.Row():
+        autostereogram = gr.Image(label="Autostereogram", type="pil")
+    with gr.Row():
+        with gr.Column():
+            file_download = gr.File(label="Download Image")
+    with gr.Row():
+        gr.Examples(
+            examples=examples_images,
+            fn=process_image,
+            inputs=[input_image],
+            outputs=[predicted_depth, autostereogram, file_download],
+            cache_examples=True,
+        )
+    button.click(
+        fn=process_image,
+        inputs=[input_image],
+        outputs=[predicted_depth, autostereogram, file_download],
+    )
+blocks.launch(debug=True)