Spaces:
Running
Running
File size: 1,666 Bytes
679bbb3 b547b0c 679bbb3 b547b0c 679bbb3 b547b0c 02acfac b547b0c 679bbb3 b547b0c 679bbb3 3295ec4 b547b0c 679bbb3 3295ec4 02acfac 3295ec4 b547b0c 3295ec4 b547b0c 679bbb3 b547b0c 3295ec4 b547b0c 02acfac b547b0c 70d73db 3295ec4 02acfac b547b0c 3295ec4 b547b0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
import numpy as np
from PIL import Image
# Load model và feature extractor
feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
model.eval()
def process_image(image):
# Đảm bảo ảnh là RGB
if image.mode != "RGB":
image = image.convert("RGB")
# Encode
encoding = feature_extractor(image, return_tensors="pt")
# Dự đoán depth
with torch.no_grad():
outputs = model(**encoding)
predicted_depth = outputs.predicted_depth
# Resize về kích thước ảnh gốc
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False
).squeeze()
# Chuẩn hóa và chuyển về ảnh uint8
output = prediction.cpu().numpy()
output = (output - np.min(output)) / (np.max(output) - np.min(output)) # normalize
formatted = (output * 255).astype("uint8")
depth_img = Image.fromarray(formatted)
return depth_img
title = "Demo: zero-shot depth estimation with DPT"
description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
iface = gr.Interface(
fn=process_image,
inputs=gr.inputs.Image(type="pil", label="Input Image"),
outputs=gr.outputs.Image(type="pil", label="Predicted Depth"),
title=title,
description=description,
)
iface.launch(debug=True)
|