text-to-3d

Running on L40S

App Files Files Community

text-to-3d / gradio_app.py

jbilcke-hf HF staff

Update gradio_app.py

eecf990 verified 2 months ago

raw

history blame

3.86 kB

	import os
	import base64
	import tempfile
	from typing import Any
	import torch
	import numpy as np
	from PIL import Image
	import gradio as gr
	import trimesh
	from transparent_background import Remover

	# Import and setup SPAR3D
	os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
	import spar3d.utils as spar3d_utils
	from spar3d.system import SPAR3D

	# Constants
	COND_WIDTH = 512
	COND_HEIGHT = 512
	COND_DISTANCE = 2.2
	COND_FOVY = 0.591627
	BACKGROUND_COLOR = [0.5, 0.5, 0.5]

	# Initialize models
	device = spar3d_utils.get_device()
	bg_remover = Remover()
	model = SPAR3D.from_pretrained(
	"stabilityai/stable-point-aware-3d",
	config_name="config.yaml",
	weight_name="model.safetensors"
	).eval().to(device)

	# Initialize camera parameters
	c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
	intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
	COND_FOVY, COND_HEIGHT, COND_WIDTH
	)

	def create_batch(input_image: Image) -> dict[str, Any]:
	"""Prepare image batch for model input."""
	img_cond = (
	torch.from_numpy(
	np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
	/ 255.0
	)
	.float()
	.clip(0, 1)
	)
	mask_cond = img_cond[:, :, -1:]
	rgb_cond = torch.lerp(
	torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
	)

	batch = {
	"rgb_cond": rgb_cond.unsqueeze(0),
	"mask_cond": mask_cond.unsqueeze(0),
	"c2w_cond": c2w_cond.unsqueeze(0),
	"intrinsic_cond": intrinsic.unsqueeze(0),
	"intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
	}
	return batch

	def process_image(image_base64: str) -> str:
	"""Process image and return GLB as base64."""
	try:
	# Decode base64 image
	image_data = base64.b64decode(image_base64)
	input_image = Image.open(tempfile.SpooledTemporaryFile(suffix='.png'))
	input_image.frombytes(image_data)

	# Remove background if needed
	if input_image.mode != 'RGBA':
	input_image = bg_remover.process(input_image.convert("RGB"))

	# Auto crop
	input_image = spar3d_utils.foreground_crop(
	input_image,
	crop_ratio=1.3, # Default padding ratio
	newsize=(COND_WIDTH, COND_HEIGHT),
	no_crop=False
	)

	# Prepare batch
	batch = create_batch(input_image)
	batch = {k: v.to(device) for k, v in batch.items()}

	# Generate mesh
	with torch.no_grad():
	with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
	trimesh_mesh, _ = model.generate_mesh(
	batch,
	texture_resolution=1024,
	remesh="none",
	vertex_count=-1,
	estimate_illumination=False
	)
	trimesh_mesh = trimesh_mesh[0]

	# Export to GLB
	temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
	trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)

	# Convert to base64
	with open(temp_file.name, 'rb') as f:
	glb_base64 = base64.b64encode(f.read()).decode('utf-8')

	# Cleanup
	os.unlink(temp_file.name)

	return glb_base64

	except Exception as e:
	return str(e)

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_image,
	inputs=gr.Text(label="Base64 Image"),
	outputs=gr.Text(label="Base64 GLB"),
	title="SPAR3D Image to GLB Converter",
	description="Upload a base64-encoded image and get back a base64-encoded GLB file"
	)

	if __name__ == "__main__":
	demo.launch(share=False)