dpt-depth04

Running

App Files Files Community

adpro commited on Jul 7

Commit

3295ec4

verified ·

1 Parent(s): 7f500f1

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -23

app.py CHANGED Viewed

@@ -4,41 +4,46 @@ import torch
 import numpy as np
 from PIL import Image
-#torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
 def process_image(image):
-    # prepare image for the model
     encoding = feature_extractor(image, return_tensors="pt")
-    # forward pass
     with torch.no_grad():
-       outputs = model(**encoding)
-       predicted_depth = outputs.predicted_depth
-    # interpolate to original size
     prediction = torch.nn.functional.interpolate(
-                        predicted_depth.unsqueeze(1),
-                        size=image.size[::-1],
-                        mode="bicubic",
-                        align_corners=False,
-                 ).squeeze()
     output = prediction.cpu().numpy()
     formatted = (output * 255 / np.max(output)).astype('uint8')
     img = Image.fromarray(formatted)
-    return img
-title = "Demo: zero-shot depth estimation with DPT"
-description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
-iface = gr.Interface(fn=process_image,
-                     inputs=gr.inputs.Image(type="pil"),
-                     outputs=gr.outputs.Image(type="pil", label="predicted depth"),
-                     title=title,
-                     description=description,
-                     enable_queue=True)
-iface.launch(debug=True)

 import numpy as np
 from PIL import Image
+# Load model and feature extractor
 feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
 model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+model.eval()
 def process_image(image):
+    # Chuẩn hóa ảnh đầu vào
     encoding = feature_extractor(image, return_tensors="pt")
+    # Forward qua model
     with torch.no_grad():
+        outputs = model(**encoding)
+        predicted_depth = outputs.predicted_depth
+    # Resize output về đúng kích thước ảnh gốc
     prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=image.size[::-1],  # (H, W)
+        mode="bicubic",
+        align_corners=False
+    ).squeeze()
+    # Chuyển thành ảnh uint8
     output = prediction.cpu().numpy()
     formatted = (output * 255 / np.max(output)).astype('uint8')
     img = Image.fromarray(formatted)
+    return img
+# Interface
+title = "Demo: Zero-shot Depth Estimation with DPT"
+description = "Intel's DPT: Dense Prediction Transformer for depth estimation from a single image."
+iface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Input Image"),
+    outputs=gr.Image(type="pil", label="Predicted Depth"),
+    title=title,
+    description=description,
+    allow_flagging="never"
+)
+iface.launch(debug=True)