File size: 2,357 Bytes
5c0b534
9b4ee8f
 
6b8e3c4
a979122
7598e8a
5c0b534
aeca07b
6b8e3c4
9780d7b
aeca07b
 
 
c1883e2
 
aeca07b
 
 
 
 
 
 
c1883e2
aeca07b
 
 
371a984
 
 
 
 
 
fd2c88a
 
 
5c0b534
371a984
5c0b534
fd2c88a
 
 
 
 
 
 
 
 
 
371a984
fd2c88a
 
 
5c0b534
 
371a984
 
5c0b534
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
import supervision as sv
from inference import DepthPredictor, SegmentPredictor
from utils import create_3d_obj, create_3d_pc, point_cloud
import numpy as np

def produce_depth_map(image):
    depth_predictor = DepthPredictor()
    depth_result = depth_predictor.predict(image)
    return depth_result

def produce_segmentation_map(image):
    segment_predictor = SegmentPredictor()
    sam_result = segment_predictor.predict(image)
    return sam_result

def produce_3d_reconstruction(image):
    depth_predictor = DepthPredictor()
    depth_result = depth_predictor.predict(image)
    rgb_gltf_path = create_3d_obj(np.array(image), depth_result, path='./rgb.gltf')
    return rgb_gltf_path

def produce_point_cloud(depth_map, segmentation_map):
    return point_cloud(np.array(segmentation_map), depth_map)

def snap(image, depth_map, segmentation_map, video):
    depth_result = produce_depth_map(image) if depth_map else None
    sam_result = produce_segmentation_map(image) if segmentation_map else None
    rgb_gltf_path = produce_3d_reconstruction(image) if depth_map else None
    point_cloud_fig = produce_point_cloud(depth_result, sam_result) if segmentation_map else None
    
    if video:
        # Add video processing here if needed
        pass

    return [image, depth_result, sam_result, rgb_gltf_path, point_cloud_fig]

# Interface inputs
image_input = gr.Image(source="webcam", tool=None, label="Input Image", type="pil")
depth_map_button = gr.Button(label="Produce Depth Map", value=False)
segmentation_map_button = gr.Button(label="Produce Segmentation Map", value=False)
video_input = gr.Video(source="webcam")

# Interface outputs
output_image = gr.Image(label="RGB")
output_depth_map = gr.Image(label="Predicted Depth")
output_segmentation_map = gr.Image(label="Predicted Segmentation")
output_3d_reconstruction = gr.Model3D(label="3D mesh reconstruction - RGB", clear_color=[1.0, 1.0, 1.0, 1.0])
output_point_cloud = gr.Plot(label="Point Cloud")

# Interface
demo = gr.Interface(
    snap,
    inputs=[image_input, depth_map_button, segmentation_map_button, video_input],
    outputs=[output_image, output_depth_map, output_segmentation_map, output_3d_reconstruction, output_point_cloud]
)

if __name__ == "__main__":
    demo.launch()