import gradio as gr from transformers import DPTFeatureExtractor, DPTForDepthEstimation import torch import numpy as np from PIL import Image # Load model và feature extractor feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") model.eval() def process_image(image): # Đảm bảo ảnh là RGB if image.mode != "RGB": image = image.convert("RGB") # Encode encoding = feature_extractor(image, return_tensors="pt") # Dự đoán depth with torch.no_grad(): outputs = model(**encoding) predicted_depth = outputs.predicted_depth # Resize về kích thước ảnh gốc prediction = torch.nn.functional.interpolate( predicted_depth.unsqueeze(1), size=image.size[::-1], mode="bicubic", align_corners=False ).squeeze() # Chuẩn hóa và chuyển về ảnh uint8 output = prediction.cpu().numpy() output = (output - np.min(output)) / (np.max(output) - np.min(output)) # normalize formatted = (output * 255).astype("uint8") depth_img = Image.fromarray(formatted) return depth_img title = "Demo: zero-shot depth estimation with DPT" description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation." iface = gr.Interface( fn=process_image, inputs=gr.inputs.Image(type="pil", label="Input Image"), outputs=gr.outputs.Image(type="pil", label="Predicted Depth"), title=title, description=description, ) iface.launch(debug=True)