Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import tempfile | |
import time | |
import gradio as gr | |
import torch | |
from PIL import Image | |
from diffusers import FluxPipeline | |
from huggingface_hub import hf_hub_download | |
from sf3d.system import SF3D | |
import sf3d.utils as sf3d_utils | |
from gradio_litmodel3d import LitModel3D | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
dtype = torch.bfloat16 | |
torch.backends.cuda.matmul.allow_tf32 = True | |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN") | |
# Set up environment and cache | |
cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models") | |
os.environ["TRANSFORMERS_CACHE"] = cache_path | |
os.environ["HF_HUB_CACHE"] = cache_path | |
os.environ["HF_HOME"] = cache_path | |
if not os.path.exists(cache_path): | |
os.makedirs(cache_path, exist_ok=True) | |
# Initialize Flux pipeline | |
pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=huggingface_token) | |
pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors")) | |
pipe.fuse_lora(lora_scale=0.125) | |
pipe.to(device="cuda", dtype=torch.bfloat16) | |
# Initialize SF3D model | |
sf3d_model = SF3D.from_pretrained( | |
"stabilityai/stable-fast-3d", | |
config_name="config.yaml", | |
weight_name="model.safetensors", | |
token=huggingface_token | |
) | |
sf3d_model.eval().cuda() | |
# Constants for SF3D | |
COND_WIDTH, COND_HEIGHT = 512, 512 | |
COND_DISTANCE, COND_FOVY_DEG = 1.6, 40 | |
BACKGROUND_COLOR = [0.5, 0.5, 0.5] | |
c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE) | |
intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg( | |
COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH | |
) | |
def generate_image(prompt, height, width, steps, scales, seed): | |
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): | |
return pipe( | |
prompt=[prompt], | |
generator=torch.Generator().manual_seed(int(seed)), | |
num_inference_steps=int(steps), | |
guidance_scale=float(scales), | |
height=int(height), | |
width=int(width), | |
max_sequence_length=256 | |
).images[0] | |
def create_batch(input_image: Image.Image) -> dict: | |
img_cond = torch.from_numpy( | |
np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0 | |
).float().clip(0, 1) | |
mask_cond = img_cond[:, :, -1:] | |
rgb_cond = torch.lerp( | |
torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond | |
) | |
batch_elem = { | |
"rgb_cond": rgb_cond, | |
"mask_cond": mask_cond, | |
"c2w_cond": c2w_cond.unsqueeze(0), | |
"intrinsic_cond": intrinsic.unsqueeze(0), | |
"intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0), | |
} | |
return {k: v.unsqueeze(0) for k, v in batch_elem.items()} | |
def generate_3d_model(input_image): | |
with torch.no_grad(): | |
with torch.autocast(device_type="cuda", dtype=torch.float16): | |
model_batch = create_batch(input_image) | |
model_batch = {k: v.cuda() for k, v in model_batch.items()} | |
trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024) | |
trimesh_mesh = trimesh_mesh[0] | |
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb") | |
trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True) | |
return tmp_file.name | |
def process_and_generate(prompt, height, width, steps, scales, seed): | |
# Generate image from prompt | |
generated_image = generate_image(prompt, height, width, steps, scales, seed) | |
# Generate 3D model from the image | |
glb_file = generate_3d_model(generated_image) | |
return generated_image, glb_file | |
# Gradio interface | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Text-to-3D Model Generator") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
prompt = gr.Textbox(label="Your Image Description", lines=3) | |
with gr.Accordion("Advanced Settings", open=False): | |
height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024) | |
width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024) | |
steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8) | |
scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5) | |
seed = gr.Number(label="Seed", value=3413, precision=0) | |
generate_btn = gr.Button("Generate 3D Model", variant="primary") | |
with gr.Column(scale=4): | |
output_image = gr.Image(label="Generated Image") | |
output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0]) | |
generate_btn.click( | |
process_and_generate, | |
inputs=[prompt, height, width, steps, scales, seed], | |
outputs=[output_image, output_3d] | |
) | |
if __name__ == "__main__": | |
demo.launch() |