dpt-depth04

Running

File size: 1,666 Bytes

679bbb3
 
 
 
 
 
b547b0c
679bbb3
 
b547b0c
679bbb3
b547b0c
 
 
 
02acfac
b547b0c
679bbb3
b547b0c
 
679bbb3
3295ec4
 
b547b0c
 
679bbb3
3295ec4
02acfac
3295ec4
b547b0c
3295ec4
b547b0c
 
679bbb3
b547b0c
 
 
3295ec4
b547b0c
02acfac
b547b0c
 
70d73db
3295ec4
02acfac
b547b0c
 
3295ec4
 
 
b547b0c

import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image

# Load model và feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
model.eval()

def process_image(image):
    # Đảm bảo ảnh là RGB
    if image.mode != "RGB":
        image = image.convert("RGB")

    # Encode
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Dự đoán depth
    with torch.no_grad():
        outputs = model(**encoding)
        predicted_depth = outputs.predicted_depth
    
    # Resize về kích thước ảnh gốc
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],
        mode="bicubic",
        align_corners=False
    ).squeeze()
    
    # Chuẩn hóa và chuyển về ảnh uint8
    output = prediction.cpu().numpy()
    output = (output - np.min(output)) / (np.max(output) - np.min(output))  # normalize
    formatted = (output * 255).astype("uint8")
    depth_img = Image.fromarray(formatted)

    return depth_img

title = "Demo: zero-shot depth estimation with DPT"
description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."

iface = gr.Interface(
    fn=process_image,
    inputs=gr.inputs.Image(type="pil", label="Input Image"),
    outputs=gr.outputs.Image(type="pil", label="Predicted Depth"),
    title=title,
    description=description,
)

iface.launch(debug=True)