text-to-3d / gradio_app.py
jbilcke-hf's picture
jbilcke-hf HF staff
Update gradio_app.py
eecf990 verified
raw
history blame
3.86 kB
import os
import base64
import tempfile
from typing import Any
import torch
import numpy as np
from PIL import Image
import gradio as gr
import trimesh
from transparent_background import Remover
# Import and setup SPAR3D
os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
import spar3d.utils as spar3d_utils
from spar3d.system import SPAR3D
# Constants
COND_WIDTH = 512
COND_HEIGHT = 512
COND_DISTANCE = 2.2
COND_FOVY = 0.591627
BACKGROUND_COLOR = [0.5, 0.5, 0.5]
# Initialize models
device = spar3d_utils.get_device()
bg_remover = Remover()
model = SPAR3D.from_pretrained(
"stabilityai/stable-point-aware-3d",
config_name="config.yaml",
weight_name="model.safetensors"
).eval().to(device)
# Initialize camera parameters
c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
COND_FOVY, COND_HEIGHT, COND_WIDTH
)
def create_batch(input_image: Image) -> dict[str, Any]:
"""Prepare image batch for model input."""
img_cond = (
torch.from_numpy(
np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
/ 255.0
)
.float()
.clip(0, 1)
)
mask_cond = img_cond[:, :, -1:]
rgb_cond = torch.lerp(
torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
)
batch = {
"rgb_cond": rgb_cond.unsqueeze(0),
"mask_cond": mask_cond.unsqueeze(0),
"c2w_cond": c2w_cond.unsqueeze(0),
"intrinsic_cond": intrinsic.unsqueeze(0),
"intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
}
return batch
def process_image(image_base64: str) -> str:
"""Process image and return GLB as base64."""
try:
# Decode base64 image
image_data = base64.b64decode(image_base64)
input_image = Image.open(tempfile.SpooledTemporaryFile(suffix='.png'))
input_image.frombytes(image_data)
# Remove background if needed
if input_image.mode != 'RGBA':
input_image = bg_remover.process(input_image.convert("RGB"))
# Auto crop
input_image = spar3d_utils.foreground_crop(
input_image,
crop_ratio=1.3, # Default padding ratio
newsize=(COND_WIDTH, COND_HEIGHT),
no_crop=False
)
# Prepare batch
batch = create_batch(input_image)
batch = {k: v.to(device) for k, v in batch.items()}
# Generate mesh
with torch.no_grad():
with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
trimesh_mesh, _ = model.generate_mesh(
batch,
texture_resolution=1024,
remesh="none",
vertex_count=-1,
estimate_illumination=False
)
trimesh_mesh = trimesh_mesh[0]
# Export to GLB
temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
# Convert to base64
with open(temp_file.name, 'rb') as f:
glb_base64 = base64.b64encode(f.read()).decode('utf-8')
# Cleanup
os.unlink(temp_file.name)
return glb_base64
except Exception as e:
return str(e)
# Create Gradio interface
demo = gr.Interface(
fn=process_image,
inputs=gr.Text(label="Base64 Image"),
outputs=gr.Text(label="Base64 GLB"),
title="SPAR3D Image to GLB Converter",
description="Upload a base64-encoded image and get back a base64-encoded GLB file"
)
if __name__ == "__main__":
demo.launch(share=False)