dpt-depth04 / app.py
adpro's picture
Update app.py
b547b0c verified
raw
history blame
1.67 kB
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
# Load model và feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
model.eval()
def process_image(image):
# Đảm bảo ảnh là RGB
if image.mode != "RGB":
image = image.convert("RGB")
# Encode
encoding = feature_extractor(image, return_tensors="pt")
# Dự đoán depth
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# Resize về kích thước ảnh gốc
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False
).squeeze()
# Chuẩn hóa và chuyển về ảnh uint8
output = prediction.cpu().numpy()
output = (output - np.min(output)) / (np.max(output) - np.min(output)) # normalize
formatted = (output * 255).astype("uint8")
depth_img = Image.fromarray(formatted)
return depth_img
title = "Demo: zero-shot depth estimation with DPT"
description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
iface = gr.Interface(
fn=process_image,
inputs=gr.inputs.Image(type="pil", label="Input Image"),
outputs=gr.outputs.Image(type="pil", label="Predicted Depth"),
title=title,
description=description,
)
iface.launch(debug=True)