File size: 3,582 Bytes
38dbec8
 
 
 
eecf990
38dbec8
eecf990
 
38dbec8
 
eecf990
a399d55
e2ccc8a
 
38dbec8
eecf990
e2ccc8a
 
 
 
 
38dbec8
eecf990
e2ccc8a
eecf990
e2ccc8a
 
 
eecf990
 
38dbec8
eecf990
 
 
 
 
38dbec8
 
eecf990
38dbec8
 
 
 
 
 
 
 
 
 
 
 
 
eecf990
 
 
38dbec8
 
 
 
eecf990
 
d1dfe56
 
eecf990
d1dfe56
 
eecf990
 
 
 
 
 
 
 
 
38dbec8
eecf990
38dbec8
 
eecf990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1dfe56
eecf990
 
 
 
 
 
 
d1dfe56
 
 
 
 
 
 
 
eecf990
d1dfe56
eecf990
38dbec8
eecf990
d1dfe56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import tempfile
from typing import Any
import torch
import numpy as np
from PIL import Image
import gradio as gr
import trimesh
from transparent_background import Remover

# Import and setup SPAR3D 
os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
import spar3d.utils as spar3d_utils
from spar3d.system import SPAR3D

# Constants
COND_WIDTH = 512
COND_HEIGHT = 512
COND_DISTANCE = 2.2
COND_FOVY = 0.591627
BACKGROUND_COLOR = [0.5, 0.5, 0.5]

# Initialize models
device = spar3d_utils.get_device()
bg_remover = Remover()
model = SPAR3D.from_pretrained(
    "stabilityai/stable-point-aware-3d",
    config_name="config.yaml",
    weight_name="model.safetensors"
).eval().to(device)

# Initialize camera parameters
c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
    COND_FOVY, COND_HEIGHT, COND_WIDTH
)

def create_batch(input_image: Image) -> dict[str, Any]:
    """Prepare image batch for model input."""
    img_cond = (
        torch.from_numpy(
            np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
            / 255.0
        )
        .float()
        .clip(0, 1)
    )
    mask_cond = img_cond[:, :, -1:]
    rgb_cond = torch.lerp(
        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
    )

    batch = {
        "rgb_cond": rgb_cond.unsqueeze(0),
        "mask_cond": mask_cond.unsqueeze(0),
        "c2w_cond": c2w_cond.unsqueeze(0),
        "intrinsic_cond": intrinsic.unsqueeze(0),
        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
    }
    return batch

def process_image(image_path: str) -> str:
    """Process image and return path to GLB file."""
    try:
        # Load image
        input_image = Image.open(image_path)
        
        # Remove background if needed
        if input_image.mode != 'RGBA':
            input_image = bg_remover.process(input_image.convert("RGB"))
        
        # Auto crop
        input_image = spar3d_utils.foreground_crop(
            input_image,
            crop_ratio=1.3,  # Default padding ratio
            newsize=(COND_WIDTH, COND_HEIGHT),
            no_crop=False
        )

        # Prepare batch
        batch = create_batch(input_image)
        batch = {k: v.to(device) for k, v in batch.items()}

        # Generate mesh
        with torch.no_grad():
            with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
                trimesh_mesh, _ = model.generate_mesh(
                    batch,
                    texture_resolution=1024,
                    remesh="none",
                    vertex_count=-1,
                    estimate_illumination=False
                )
                trimesh_mesh = trimesh_mesh[0]

        # Export to GLB
        temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
        trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
        
        return temp_file.name
        
    except Exception as e:
        return str(e)

# Create Gradio interface
demo = gr.Interface(
    fn=process_image,
    inputs=gr.File(
        label="Upload Image",
        file_types=["image"],
    ),
    outputs=gr.File(
        label="Download GLB",
        file_types=[".glb"],
    ),
    title="SPAR3D Image to GLB Converter",
    description="Upload an image (JPG, PNG, or WebP) and get back a 3D model in GLB format",
)

if __name__ == "__main__":
    demo.launch()