Spaces:
Runtime error
Runtime error
jens
commited on
Commit
·
077fc91
1
Parent(s):
2eca80e
UI first try
Browse files- app.py +85 -45
- app_legacy.py +48 -0
- inference.py +20 -3
app.py
CHANGED
|
@@ -1,48 +1,88 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
| 3 |
-
import supervision as sv
|
| 4 |
-
from inference import DepthPredictor, SegmentPredictor
|
| 5 |
-
from utils import create_3d_obj, create_3d_pc, point_cloud
|
| 6 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import torch
|
| 7 |
+
from inference import SegmentPredictor
|
| 8 |
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
sam = SegmentPredictor() #service.get_sam(configs.model_type, configs.model_ckpt_path, configs.device)
|
| 12 |
+
red = (255,0,0)
|
| 13 |
+
blue = (0,0,255)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
block = gr.Blocks()
|
| 17 |
+
with block:
|
| 18 |
+
# States
|
| 19 |
+
def point_coords_empty():
|
| 20 |
+
return []
|
| 21 |
+
def point_labels_empty():
|
| 22 |
+
return []
|
| 23 |
+
raw_image = gr.Image(type='pil', visible=False)
|
| 24 |
+
point_coords = gr.State(point_coords_empty)
|
| 25 |
+
point_labels = gr.State(point_labels_empty)
|
| 26 |
+
masks = gr.State()
|
| 27 |
+
cutout_idx = gr.State(set())
|
| 28 |
+
|
| 29 |
+
# UI
|
| 30 |
+
with gr.Column():
|
| 31 |
+
with gr.Row():
|
| 32 |
+
input_image = gr.Image(label='Input', height=512, type='pil')
|
| 33 |
+
masks_annotated_image = gr.AnnotatedImage(label='Segments', height=512)
|
| 34 |
+
cutout_galary = gr.Gallery(label='Cutouts', object_fit='contain', height=512)
|
| 35 |
+
with gr.Row():
|
| 36 |
+
with gr.Column(scale=1):
|
| 37 |
+
point_label_radio = gr.Radio(label='Point Label', choices=[1,0], value=1)
|
| 38 |
+
reset_btn = gr.Button('Reset')
|
| 39 |
+
sam_sgmt_everything_btn = gr.Button('Segment Everything!', variant = 'primary')
|
| 40 |
+
sam_encode_btn = gr.Button('Encode', variant = 'primary')
|
| 41 |
+
sam_decode_btn = gr.Button('Predict using points!')
|
| 42 |
+
|
| 43 |
+
# components
|
| 44 |
+
components = {point_coords, point_labels, raw_image, masks, cutout_idx, input_image,
|
| 45 |
+
point_label_radio, reset_btn, sam_sgmt_everything_btn, sam_encode_btn,
|
| 46 |
+
sam_decode_btn, masks_annotated_image}
|
| 47 |
+
# event - init coords
|
| 48 |
+
def on_reset_btn_click(raw_image):
|
| 49 |
+
return raw_image, point_coords_empty(), point_labels_empty(), None, []
|
| 50 |
+
reset_btn.click(on_reset_btn_click, [raw_image], [input_image, point_coords, point_labels], queue=False)
|
| 51 |
+
|
| 52 |
+
def on_input_image_upload(input_image):
|
| 53 |
+
# encode image on upload
|
| 54 |
+
return input_image, point_coords_empty(), point_labels_empty(), None
|
| 55 |
+
input_image.upload(on_input_image_upload, [input_image], [raw_image, point_coords, point_labels], queue=False)
|
| 56 |
+
|
| 57 |
+
# event - set coords
|
| 58 |
+
def on_input_image_select(input_image, point_coords, point_labels, point_label_radio, evt: gr.SelectData):
|
| 59 |
+
x, y = evt.index
|
| 60 |
+
color = red if point_label_radio == 0 else blue
|
| 61 |
+
img = np.array(input_image)
|
| 62 |
+
cv2.circle(img, (x, y), 5, color, -1)
|
| 63 |
+
img = Image.fromarray(img)
|
| 64 |
+
point_coords.append([x,y])
|
| 65 |
+
point_labels.append(point_label_radio)
|
| 66 |
+
return img, point_coords, point_labels
|
| 67 |
+
input_image.select(on_input_image_select, [input_image, point_coords, point_labels, point_label_radio], [input_image, point_coords, point_labels], queue=False)
|
| 68 |
+
|
| 69 |
+
# event - inference
|
| 70 |
+
def on_click_sam_encode_btn(inputs):
|
| 71 |
+
image = inputs[raw_image]
|
| 72 |
+
sam.encode(image)
|
| 73 |
+
|
| 74 |
+
def on_click_sam_dencode_btn(inputs):
|
| 75 |
+
image = inputs[raw_image]
|
| 76 |
+
generated_masks, _ = sam.cond_pred(pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels]))
|
| 77 |
+
annotated = (image, [(generated_masks[i], f'Mask {i}') for i in range(len(generated_masks))])
|
| 78 |
+
return {masks_annotated_image:annotated,
|
| 79 |
+
masks: generated_masks,
|
| 80 |
+
cutout_idx: set()}
|
| 81 |
+
sam_encode_btn.click(on_click_sam_encode_btn, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
| 82 |
+
sam_decode_btn.click(on_click_sam_dencode_btn, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
| 83 |
+
#sam_sgmt_everything_btn.click(on_sam_sgmt_everything_click, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
if __name__ == '__main__':
|
| 87 |
+
block.queue()
|
| 88 |
+
block.launch()
|
app_legacy.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
| 3 |
+
import supervision as sv
|
| 4 |
+
from inference import DepthPredictor, SegmentPredictor
|
| 5 |
+
from utils import create_3d_obj, create_3d_pc, point_cloud
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def produce_depth_map(image):
|
| 9 |
+
depth_predictor = DepthPredictor()
|
| 10 |
+
depth_result = depth_predictor.predict(image)
|
| 11 |
+
return depth_result
|
| 12 |
+
|
| 13 |
+
def produce_segmentation_map(image):
|
| 14 |
+
segment_predictor = SegmentPredictor()
|
| 15 |
+
sam_result = segment_predictor.predict(image)
|
| 16 |
+
return sam_result
|
| 17 |
+
|
| 18 |
+
def produce_3d_reconstruction(image):
|
| 19 |
+
depth_predictor = DepthPredictor()
|
| 20 |
+
depth_result = depth_predictor.predict(image)
|
| 21 |
+
rgb_gltf_path = create_3d_obj(np.array(image), depth_result, path='./rgb.gltf')
|
| 22 |
+
return rgb_gltf_path
|
| 23 |
+
|
| 24 |
+
def produce_point_cloud(depth_map, segmentation_map):
|
| 25 |
+
return point_cloud(np.array(segmentation_map), depth_map)
|
| 26 |
+
|
| 27 |
+
def snap(image, depth_map, segmentation_map):
|
| 28 |
+
depth_result = produce_depth_map(image) if depth_map else None
|
| 29 |
+
sam_result = produce_segmentation_map(image) if segmentation_map else None
|
| 30 |
+
rgb_gltf_path = produce_3d_reconstruction(image) if depth_map else None
|
| 31 |
+
point_cloud_fig = produce_point_cloud(depth_result, sam_result) if (segmentation_map and depth_map) else None
|
| 32 |
+
|
| 33 |
+
return [image, depth_result, sam_result, rgb_gltf_path, point_cloud_fig]
|
| 34 |
+
demo = gr.Interface(
|
| 35 |
+
snap,
|
| 36 |
+
inputs=[gr.Image(source="webcam", tool=None, label="Input Image", type="pil"),
|
| 37 |
+
"checkbox",
|
| 38 |
+
"checkbox"],
|
| 39 |
+
outputs=[gr.Image(label="RGB"),
|
| 40 |
+
gr.Image(label="predicted depth"),
|
| 41 |
+
gr.Image(label="predicted segmentation"),
|
| 42 |
+
gr.Model3D(label="3D mesh reconstruction - RGB",
|
| 43 |
+
clear_color=[1.0, 1.0, 1.0, 1.0]),
|
| 44 |
+
gr.Plot()]
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
if __name__ == "__main__":
|
| 48 |
+
demo.launch()
|
inference.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
| 2 |
-
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
| 3 |
import gradio as gr
|
| 4 |
import supervision as sv
|
| 5 |
import torch
|
|
@@ -13,7 +13,7 @@ class DepthPredictor:
|
|
| 13 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 14 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 15 |
self.model.eval()
|
| 16 |
-
|
| 17 |
def predict(self, image):
|
| 18 |
# prepare image for the model
|
| 19 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
|
@@ -44,9 +44,26 @@ class SegmentPredictor:
|
|
| 44 |
MODEL_TYPE = "vit_b"
|
| 45 |
checkpoint = "sam_vit_b_01ec64.pth"
|
| 46 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
|
|
|
|
|
|
|
|
|
| 47 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
|
|
|
| 48 |
|
| 49 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
image = np.array(image)
|
| 51 |
sam_result = self.mask_generator.generate(image)
|
| 52 |
mask_annotator = sv.MaskAnnotator()
|
|
|
|
| 1 |
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
| 2 |
+
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry, SamPredictor
|
| 3 |
import gradio as gr
|
| 4 |
import supervision as sv
|
| 5 |
import torch
|
|
|
|
| 13 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 14 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 15 |
self.model.eval()
|
| 16 |
+
|
| 17 |
def predict(self, image):
|
| 18 |
# prepare image for the model
|
| 19 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
|
|
|
| 44 |
MODEL_TYPE = "vit_b"
|
| 45 |
checkpoint = "sam_vit_b_01ec64.pth"
|
| 46 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
| 47 |
+
# Select device
|
| 48 |
+
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 49 |
+
sam.to(device=self.device)
|
| 50 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
| 51 |
+
self.conditioned_pred = SamPredictor(sam)
|
| 52 |
|
| 53 |
+
def encode(self, image):
|
| 54 |
+
image = np.array(image)
|
| 55 |
+
self.conditioned_pred.set_image(image)
|
| 56 |
+
|
| 57 |
+
def cond_pred(self, pts, lbls):
|
| 58 |
+
masks, _, _ = self.conditioned_pred.predict(
|
| 59 |
+
point_coords=pts,
|
| 60 |
+
point_labels=lbls,
|
| 61 |
+
multimask_output=True
|
| 62 |
+
)
|
| 63 |
+
return masks
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def segment_everything(self, image):
|
| 67 |
image = np.array(image)
|
| 68 |
sam_result = self.mask_generator.generate(image)
|
| 69 |
mask_annotator = sv.MaskAnnotator()
|