Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,635 Bytes
5af1f4a fa09fe7 f439686 3f0fac3 0b32f48 fa09fe7 f439686 0b32f48 5af7e79 3f0fac3 f439686 fa09fe7 4898aeb fa09fe7 5af1f4a fa09fe7 3d535fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import spaces
import os
import tempfile
import time
import gradio as gr
import torch
from PIL import Image
from diffusers import DiffusionPipeline
from huggingface_hub import hf_hub_download
from sf3d.system import SF3D
import sf3d.utils as sf3d_utils
from gradio_litmodel3d import LitModel3D
from huggingface_hub import login
import subprocess
dtype = torch.bfloat16
torch.backends.cuda.matmul.allow_tf32 = True
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
device = torch.device('cuda')
import shutil
def find_cuda():
# Check if CUDA_HOME or CUDA_PATH environment variables are set
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
if cuda_home and os.path.exists(cuda_home):
return cuda_home
# Search for the nvcc executable in the system's PATH
nvcc_path = shutil.which('nvcc')
if nvcc_path:
# Remove the 'bin/nvcc' part to get the CUDA installation path
cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
return cuda_path
return None
cuda_path = find_cuda()
if cuda_path:
print(f"CUDA installation found at: {cuda_path}")
else:
print("CUDA installation not found")
login(token=huggingface_token)
# Set up environment and cache
cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
os.environ["TRANSFORMERS_CACHE"] = cache_path
os.environ["HF_HUB_CACHE"] = cache_path
os.environ["HF_HOME"] = cache_path
if not os.path.exists(cache_path):
os.makedirs(cache_path, exist_ok=True)
# Initialize Flux pipeline
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, token = huggingface_token).to(device)
pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
pipe.fuse_lora(lora_scale=0.125)
pipe.to(device="cuda", dtype=torch.bfloat16)
# Initialize SF3D model
sf3d_model = SF3D.from_pretrained(
"stabilityai/stable-fast-3d",
config_name="config.yaml",
weight_name="model.safetensors",
).eval().to(device)
# Constants for SF3D
COND_WIDTH, COND_HEIGHT = 512, 512
COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
BACKGROUND_COLOR = [0.5, 0.5, 0.5]
c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
)
def generate_image(prompt, height, width, steps, scales, seed):
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
return pipe(
prompt=[prompt],
generator=torch.Generator().manual_seed(int(seed)),
num_inference_steps=int(steps),
guidance_scale=float(scales),
height=int(height),
width=int(width),
max_sequence_length=256
).images[0]
def create_batch(input_image: Image.Image) -> dict:
img_cond = torch.from_numpy(
np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
).float().clip(0, 1)
mask_cond = img_cond[:, :, -1:]
rgb_cond = torch.lerp(
torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
)
batch_elem = {
"rgb_cond": rgb_cond,
"mask_cond": mask_cond,
"c2w_cond": c2w_cond.unsqueeze(0),
"intrinsic_cond": intrinsic.unsqueeze(0),
"intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
}
return {k: v.unsqueeze(0) for k, v in batch_elem.items()}
def generate_3d_model(input_image):
with torch.no_grad():
with torch.autocast(device_type="cuda", dtype=torch.float16):
model_batch = create_batch(input_image)
model_batch = {k: v.cuda() for k, v in model_batch.items()}
trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
trimesh_mesh = trimesh_mesh[0]
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
return tmp_file.name
@spaces.GPU
def process_and_generate(prompt, height, width, steps, scales, seed):
# Generate image from prompt
generated_image = generate_image(prompt, height, width, steps, scales, seed)
# Generate 3D model from the image
glb_file = generate_3d_model(generated_image)
return generated_image, glb_file
# Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Text-to-3D Model Generator")
with gr.Row():
with gr.Column(scale=3):
prompt = gr.Textbox(label="Your Image Description", lines=3)
with gr.Accordion("Advanced Settings", open=False):
height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
seed = gr.Number(label="Seed", value=3413, precision=0)
generate_btn = gr.Button("Generate 3D Model", variant="primary")
with gr.Column(scale=4):
output_image = gr.Image(label="Generated Image")
output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])
generate_btn.click(
process_and_generate,
inputs=[prompt, height, width, steps, scales, seed],
outputs=[output_image, output_3d]
)
if __name__ == "__main__":
demo.launch() |