dpt-depth04

Running

File size: 1,478 Bytes

679bbb3
 
 
 
 
95d18d6
679bbb3
3295ec4
679bbb3
 
3295ec4
679bbb3
 
3295ec4
679bbb3
 
3295ec4
679bbb3
3295ec4
 
679bbb3
3295ec4
679bbb3
3295ec4
 
 
 
 
 
 
679bbb3
 
 
3295ec4
 
 
 
 
70d73db
3295ec4
 
cf99912
03a319e
3295ec4
 
 
 
679bbb3
3295ec4

import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
from pathlib import Path

# Load model and feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
model.eval()

def process_image(image):
    # Chuẩn hóa ảnh đầu vào
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Forward qua model
    with torch.no_grad():
        outputs = model(**encoding)
        predicted_depth = outputs.predicted_depth
    
    # Resize output về đúng kích thước ảnh gốc
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=image.size[::-1],  # (H, W)
        mode="bicubic",
        align_corners=False
    ).squeeze()
    
    # Chuyển thành ảnh uint8
    output = prediction.cpu().numpy()
    formatted = (output * 255 / np.max(output)).astype('uint8')
    img = Image.fromarray(formatted)
    return img

# Interface
title = "Demo: Zero-shot Depth Estimation with DPT"
description = "Intel's DPT: Dense Prediction Transformer for depth estimation from a single image."

iface = gr.Interface(
    fn=process_image, 
    inputs=gr.inputs.Image(type="pil", label="Input Image"),
    outputs=predicted_depth,
    title=title,
    description=description,
    allow_flagging="never"
)

iface.launch(debug=True)