Spaces:
Running
Running
File size: 1,478 Bytes
679bbb3 95d18d6 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 679bbb3 3295ec4 70d73db 3295ec4 cf99912 03a319e 3295ec4 679bbb3 3295ec4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
from pathlib import Path
# Load model and feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
model.eval()
def process_image(image):
# Chuẩn hóa ảnh đầu vào
encoding = feature_extractor(image, return_tensors="pt")
# Forward qua model
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# Resize output về đúng kích thước ảnh gốc
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1], # (H, W)
mode="bicubic",
align_corners=False
).squeeze()
# Chuyển thành ảnh uint8
output = prediction.cpu().numpy()
formatted = (output * 255 / np.max(output)).astype('uint8')
img = Image.fromarray(formatted)
return img
# Interface
title = "Demo: Zero-shot Depth Estimation with DPT"
description = "Intel's DPT: Dense Prediction Transformer for depth estimation from a single image."
iface = gr.Interface(
fn=process_image,
inputs=gr.inputs.Image(type="pil", label="Input Image"),
outputs=predicted_depth,
title=title,
description=description,
allow_flagging="never"
)
iface.launch(debug=True)
|