hackathon_depth_segment / inference.py
jens
depth estimation
6b8e3c4
raw
history blame
1.93 kB
from transformers import DPTImageProcessor, DPTForDepthEstimation
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
import gradio as gr
import supervision as sv
import torch
import numpy as np
from PIL import Image
import requests
class DepthPredictor:
def __init__(self):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.processor = DPTImageProcessor.from_pretrained("Intel/dpt-large").to(self.device)
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large").to(self.device)
self.model.eval()
def predict(self, image):
# prepare image for the model
inputs = self.processor(images=image, return_tensors="pt").to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
predicted_depth = outputs.predicted_depth
# interpolate to original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image.size[::-1],
mode="bicubic",
align_corners=False,
)
# visualize the prediction
output = prediction.squeeze().cpu().numpy()
formatted = (output * 255 / np.max(output)).astype("uint8")
depth = Image.fromarray(formatted)
return depth
class sam_inference:
def __init__(self):
MODEL_TYPE = "vit_b"
checkpoint = "sam_vit_b_01ec64.pth"
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
self.mask_generator = SamAutomaticMaskGenerator(sam)
def predict(self, image):
sam_result = self.mask_generator.generate(image)
mask_annotator = sv.MaskAnnotator()
detections = sv.Detections.from_sam(sam_result=sam_result)
annotated_image = mask_annotator.annotate(scene=image.copy(), detections=detections)
return [annotated_image]