File size: 3,858 Bytes
38dbec8
eecf990
38dbec8
 
 
eecf990
38dbec8
eecf990
 
38dbec8
 
eecf990
a399d55
e2ccc8a
 
38dbec8
eecf990
e2ccc8a
 
 
 
 
38dbec8
eecf990
e2ccc8a
eecf990
e2ccc8a
 
 
eecf990
 
38dbec8
eecf990
 
 
 
 
38dbec8
 
eecf990
38dbec8
 
 
 
 
 
 
 
 
 
 
 
 
eecf990
 
 
38dbec8
 
 
 
eecf990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38dbec8
eecf990
38dbec8
 
eecf990
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38dbec8
eecf990
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import base64
import tempfile
from typing import Any
import torch
import numpy as np
from PIL import Image
import gradio as gr
import trimesh
from transparent_background import Remover

# Import and setup SPAR3D 
os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
import spar3d.utils as spar3d_utils
from spar3d.system import SPAR3D

# Constants
COND_WIDTH = 512
COND_HEIGHT = 512
COND_DISTANCE = 2.2
COND_FOVY = 0.591627
BACKGROUND_COLOR = [0.5, 0.5, 0.5]

# Initialize models
device = spar3d_utils.get_device()
bg_remover = Remover()
model = SPAR3D.from_pretrained(
    "stabilityai/stable-point-aware-3d",
    config_name="config.yaml",
    weight_name="model.safetensors"
).eval().to(device)

# Initialize camera parameters
c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
    COND_FOVY, COND_HEIGHT, COND_WIDTH
)

def create_batch(input_image: Image) -> dict[str, Any]:
    """Prepare image batch for model input."""
    img_cond = (
        torch.from_numpy(
            np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
            / 255.0
        )
        .float()
        .clip(0, 1)
    )
    mask_cond = img_cond[:, :, -1:]
    rgb_cond = torch.lerp(
        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
    )

    batch = {
        "rgb_cond": rgb_cond.unsqueeze(0),
        "mask_cond": mask_cond.unsqueeze(0),
        "c2w_cond": c2w_cond.unsqueeze(0),
        "intrinsic_cond": intrinsic.unsqueeze(0),
        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
    }
    return batch

def process_image(image_base64: str) -> str:
    """Process image and return GLB as base64."""
    try:
        # Decode base64 image
        image_data = base64.b64decode(image_base64)
        input_image = Image.open(tempfile.SpooledTemporaryFile(suffix='.png'))
        input_image.frombytes(image_data)
        
        # Remove background if needed
        if input_image.mode != 'RGBA':
            input_image = bg_remover.process(input_image.convert("RGB"))
        
        # Auto crop
        input_image = spar3d_utils.foreground_crop(
            input_image,
            crop_ratio=1.3,  # Default padding ratio
            newsize=(COND_WIDTH, COND_HEIGHT),
            no_crop=False
        )

        # Prepare batch
        batch = create_batch(input_image)
        batch = {k: v.to(device) for k, v in batch.items()}

        # Generate mesh
        with torch.no_grad():
            with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
                trimesh_mesh, _ = model.generate_mesh(
                    batch,
                    texture_resolution=1024,
                    remesh="none",
                    vertex_count=-1,
                    estimate_illumination=False
                )
                trimesh_mesh = trimesh_mesh[0]

        # Export to GLB
        temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
        trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
        
        # Convert to base64
        with open(temp_file.name, 'rb') as f:
            glb_base64 = base64.b64encode(f.read()).decode('utf-8')
            
        # Cleanup
        os.unlink(temp_file.name)
        
        return glb_base64
        
    except Exception as e:
        return str(e)

# Create Gradio interface
demo = gr.Interface(
    fn=process_image,
    inputs=gr.Text(label="Base64 Image"),
    outputs=gr.Text(label="Base64 GLB"),
    title="SPAR3D Image to GLB Converter",
    description="Upload a base64-encoded image and get back a base64-encoded GLB file"
)

if __name__ == "__main__":
    demo.launch(share=False)