text-to-3d / gradio_app.py
jbilcke-hf's picture
jbilcke-hf HF staff
Update gradio_app.py
287be50 verified
raw
history blame
3.59 kB
import os
import tempfile
from typing import Any
import torch
import numpy as np
from PIL import Image
import gradio as gr
import trimesh
from transparent_background import Remover
# Import and setup SPAR3D
os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
import spar3d.utils as spar3d_utils
from spar3d.system import SPAR3D
# Constants
COND_WIDTH = 512
COND_HEIGHT = 512
COND_DISTANCE = 2.2
COND_FOVY = 0.591627
BACKGROUND_COLOR = [0.5, 0.5, 0.5]
# Initialize models
device = spar3d_utils.get_device()
bg_remover = Remover()
model = SPAR3D.from_pretrained(
"stabilityai/stable-point-aware-3d",
config_name="config.yaml",
weight_name="model.safetensors"
).eval().to(device)
# Initialize camera parameters
c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
COND_FOVY, COND_HEIGHT, COND_WIDTH
)
def create_batch(input_image: Image) -> dict[str, Any]:
"""Prepare image batch for model input."""
img_cond = (
torch.from_numpy(
np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
/ 255.0
)
.float()
.clip(0, 1)
)
mask_cond = img_cond[:, :, -1:]
rgb_cond = torch.lerp(
torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
)
batch = {
"rgb_cond": rgb_cond.unsqueeze(0),
"mask_cond": mask_cond.unsqueeze(0),
"c2w_cond": c2w_cond.unsqueeze(0),
"intrinsic_cond": intrinsic.unsqueeze(0),
"intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
}
return batch
def process_image(image_path: str) -> str:
"""Process image and return path to GLB file."""
try:
# Load image
input_image = Image.open(image_path)
# Remove background if needed
if input_image.mode != 'RGBA':
input_image = bg_remover.process(input_image.convert("RGB"))
# Auto crop
input_image = spar3d_utils.foreground_crop(
input_image,
crop_ratio=1.3, # Default padding ratio
newsize=(COND_WIDTH, COND_HEIGHT),
no_crop=False
)
# Prepare batch
batch = create_batch(input_image)
batch = {k: v.to(device) for k, v in batch.items()}
# Generate mesh
with torch.no_grad():
with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
trimesh_mesh, _ = model.generate_mesh(
batch,
1024, # <- texture_resolution
remesh="none",
vertex_count=-1,
estimate_illumination=True
)
trimesh_mesh = trimesh_mesh[0]
# Export to GLB
temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
return temp_file.name
except Exception as e:
return str(e)
# Create Gradio interface
demo = gr.Interface(
fn=process_image,
inputs=gr.File(
label="Upload Image",
file_types=["image"],
),
outputs=gr.File(
label="Download GLB",
file_types=[".glb"],
),
title="SPAR3D Image to GLB Converter",
description="Upload an image (JPG, PNG, or WebP) and get back a 3D model in GLB format",
)
if __name__ == "__main__":
demo.launch()