Spaces:
Runtime error
Runtime error
jens
commited on
Commit
·
077fc91
1
Parent(s):
2eca80e
UI first try
Browse files- app.py +85 -45
- app_legacy.py +48 -0
- inference.py +20 -3
app.py
CHANGED
@@ -1,48 +1,88 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
-
import supervision as sv
|
4 |
-
from inference import DepthPredictor, SegmentPredictor
|
5 |
-
from utils import create_3d_obj, create_3d_pc, point_cloud
|
6 |
import numpy as np
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
def
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
3 |
import numpy as np
|
4 |
+
import cv2
|
5 |
+
from PIL import Image
|
6 |
+
import torch
|
7 |
+
from inference import SegmentPredictor
|
8 |
|
9 |
+
|
10 |
+
|
11 |
+
sam = SegmentPredictor() #service.get_sam(configs.model_type, configs.model_ckpt_path, configs.device)
|
12 |
+
red = (255,0,0)
|
13 |
+
blue = (0,0,255)
|
14 |
+
|
15 |
+
|
16 |
+
block = gr.Blocks()
|
17 |
+
with block:
|
18 |
+
# States
|
19 |
+
def point_coords_empty():
|
20 |
+
return []
|
21 |
+
def point_labels_empty():
|
22 |
+
return []
|
23 |
+
raw_image = gr.Image(type='pil', visible=False)
|
24 |
+
point_coords = gr.State(point_coords_empty)
|
25 |
+
point_labels = gr.State(point_labels_empty)
|
26 |
+
masks = gr.State()
|
27 |
+
cutout_idx = gr.State(set())
|
28 |
+
|
29 |
+
# UI
|
30 |
+
with gr.Column():
|
31 |
+
with gr.Row():
|
32 |
+
input_image = gr.Image(label='Input', height=512, type='pil')
|
33 |
+
masks_annotated_image = gr.AnnotatedImage(label='Segments', height=512)
|
34 |
+
cutout_galary = gr.Gallery(label='Cutouts', object_fit='contain', height=512)
|
35 |
+
with gr.Row():
|
36 |
+
with gr.Column(scale=1):
|
37 |
+
point_label_radio = gr.Radio(label='Point Label', choices=[1,0], value=1)
|
38 |
+
reset_btn = gr.Button('Reset')
|
39 |
+
sam_sgmt_everything_btn = gr.Button('Segment Everything!', variant = 'primary')
|
40 |
+
sam_encode_btn = gr.Button('Encode', variant = 'primary')
|
41 |
+
sam_decode_btn = gr.Button('Predict using points!')
|
42 |
+
|
43 |
+
# components
|
44 |
+
components = {point_coords, point_labels, raw_image, masks, cutout_idx, input_image,
|
45 |
+
point_label_radio, reset_btn, sam_sgmt_everything_btn, sam_encode_btn,
|
46 |
+
sam_decode_btn, masks_annotated_image}
|
47 |
+
# event - init coords
|
48 |
+
def on_reset_btn_click(raw_image):
|
49 |
+
return raw_image, point_coords_empty(), point_labels_empty(), None, []
|
50 |
+
reset_btn.click(on_reset_btn_click, [raw_image], [input_image, point_coords, point_labels], queue=False)
|
51 |
+
|
52 |
+
def on_input_image_upload(input_image):
|
53 |
+
# encode image on upload
|
54 |
+
return input_image, point_coords_empty(), point_labels_empty(), None
|
55 |
+
input_image.upload(on_input_image_upload, [input_image], [raw_image, point_coords, point_labels], queue=False)
|
56 |
+
|
57 |
+
# event - set coords
|
58 |
+
def on_input_image_select(input_image, point_coords, point_labels, point_label_radio, evt: gr.SelectData):
|
59 |
+
x, y = evt.index
|
60 |
+
color = red if point_label_radio == 0 else blue
|
61 |
+
img = np.array(input_image)
|
62 |
+
cv2.circle(img, (x, y), 5, color, -1)
|
63 |
+
img = Image.fromarray(img)
|
64 |
+
point_coords.append([x,y])
|
65 |
+
point_labels.append(point_label_radio)
|
66 |
+
return img, point_coords, point_labels
|
67 |
+
input_image.select(on_input_image_select, [input_image, point_coords, point_labels, point_label_radio], [input_image, point_coords, point_labels], queue=False)
|
68 |
+
|
69 |
+
# event - inference
|
70 |
+
def on_click_sam_encode_btn(inputs):
|
71 |
+
image = inputs[raw_image]
|
72 |
+
sam.encode(image)
|
73 |
+
|
74 |
+
def on_click_sam_dencode_btn(inputs):
|
75 |
+
image = inputs[raw_image]
|
76 |
+
generated_masks, _ = sam.cond_pred(pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels]))
|
77 |
+
annotated = (image, [(generated_masks[i], f'Mask {i}') for i in range(len(generated_masks))])
|
78 |
+
return {masks_annotated_image:annotated,
|
79 |
+
masks: generated_masks,
|
80 |
+
cutout_idx: set()}
|
81 |
+
sam_encode_btn.click(on_click_sam_encode_btn, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
82 |
+
sam_decode_btn.click(on_click_sam_dencode_btn, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
83 |
+
#sam_sgmt_everything_btn.click(on_sam_sgmt_everything_click, components, [masks_annotated_image, masks, cutout_idx], queue=True)
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == '__main__':
|
87 |
+
block.queue()
|
88 |
+
block.launch()
|
app_legacy.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
+
import supervision as sv
|
4 |
+
from inference import DepthPredictor, SegmentPredictor
|
5 |
+
from utils import create_3d_obj, create_3d_pc, point_cloud
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
def produce_depth_map(image):
|
9 |
+
depth_predictor = DepthPredictor()
|
10 |
+
depth_result = depth_predictor.predict(image)
|
11 |
+
return depth_result
|
12 |
+
|
13 |
+
def produce_segmentation_map(image):
|
14 |
+
segment_predictor = SegmentPredictor()
|
15 |
+
sam_result = segment_predictor.predict(image)
|
16 |
+
return sam_result
|
17 |
+
|
18 |
+
def produce_3d_reconstruction(image):
|
19 |
+
depth_predictor = DepthPredictor()
|
20 |
+
depth_result = depth_predictor.predict(image)
|
21 |
+
rgb_gltf_path = create_3d_obj(np.array(image), depth_result, path='./rgb.gltf')
|
22 |
+
return rgb_gltf_path
|
23 |
+
|
24 |
+
def produce_point_cloud(depth_map, segmentation_map):
|
25 |
+
return point_cloud(np.array(segmentation_map), depth_map)
|
26 |
+
|
27 |
+
def snap(image, depth_map, segmentation_map):
|
28 |
+
depth_result = produce_depth_map(image) if depth_map else None
|
29 |
+
sam_result = produce_segmentation_map(image) if segmentation_map else None
|
30 |
+
rgb_gltf_path = produce_3d_reconstruction(image) if depth_map else None
|
31 |
+
point_cloud_fig = produce_point_cloud(depth_result, sam_result) if (segmentation_map and depth_map) else None
|
32 |
+
|
33 |
+
return [image, depth_result, sam_result, rgb_gltf_path, point_cloud_fig]
|
34 |
+
demo = gr.Interface(
|
35 |
+
snap,
|
36 |
+
inputs=[gr.Image(source="webcam", tool=None, label="Input Image", type="pil"),
|
37 |
+
"checkbox",
|
38 |
+
"checkbox"],
|
39 |
+
outputs=[gr.Image(label="RGB"),
|
40 |
+
gr.Image(label="predicted depth"),
|
41 |
+
gr.Image(label="predicted segmentation"),
|
42 |
+
gr.Model3D(label="3D mesh reconstruction - RGB",
|
43 |
+
clear_color=[1.0, 1.0, 1.0, 1.0]),
|
44 |
+
gr.Plot()]
|
45 |
+
)
|
46 |
+
|
47 |
+
if __name__ == "__main__":
|
48 |
+
demo.launch()
|
inference.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
2 |
-
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
|
3 |
import gradio as gr
|
4 |
import supervision as sv
|
5 |
import torch
|
@@ -13,7 +13,7 @@ class DepthPredictor:
|
|
13 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
14 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
15 |
self.model.eval()
|
16 |
-
|
17 |
def predict(self, image):
|
18 |
# prepare image for the model
|
19 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
@@ -44,9 +44,26 @@ class SegmentPredictor:
|
|
44 |
MODEL_TYPE = "vit_b"
|
45 |
checkpoint = "sam_vit_b_01ec64.pth"
|
46 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
|
|
|
|
|
|
47 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
|
|
48 |
|
49 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
image = np.array(image)
|
51 |
sam_result = self.mask_generator.generate(image)
|
52 |
mask_annotator = sv.MaskAnnotator()
|
|
|
1 |
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
2 |
+
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry, SamPredictor
|
3 |
import gradio as gr
|
4 |
import supervision as sv
|
5 |
import torch
|
|
|
13 |
self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
14 |
self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
15 |
self.model.eval()
|
16 |
+
|
17 |
def predict(self, image):
|
18 |
# prepare image for the model
|
19 |
encoding = self.feature_extractor(image, return_tensors="pt")
|
|
|
44 |
MODEL_TYPE = "vit_b"
|
45 |
checkpoint = "sam_vit_b_01ec64.pth"
|
46 |
sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
|
47 |
+
# Select device
|
48 |
+
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
49 |
+
sam.to(device=self.device)
|
50 |
self.mask_generator = SamAutomaticMaskGenerator(sam)
|
51 |
+
self.conditioned_pred = SamPredictor(sam)
|
52 |
|
53 |
+
def encode(self, image):
|
54 |
+
image = np.array(image)
|
55 |
+
self.conditioned_pred.set_image(image)
|
56 |
+
|
57 |
+
def cond_pred(self, pts, lbls):
|
58 |
+
masks, _, _ = self.conditioned_pred.predict(
|
59 |
+
point_coords=pts,
|
60 |
+
point_labels=lbls,
|
61 |
+
multimask_output=True
|
62 |
+
)
|
63 |
+
return masks
|
64 |
+
|
65 |
+
|
66 |
+
def segment_everything(self, image):
|
67 |
image = np.array(image)
|
68 |
sam_result = self.mask_generator.generate(image)
|
69 |
mask_annotator = sv.MaskAnnotator()
|