File size: 5,635 Bytes
5af1f4a
fa09fe7
 
 
 
 
 
 
 
 
 
 
f439686
3f0fac3
0b32f48
fa09fe7
 
 
 
f439686
0b32f48
 
5af7e79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f0fac3
f439686
fa09fe7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4898aeb
fa09fe7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5af1f4a
fa09fe7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3d535fa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import spaces
import os
import tempfile
import time
import gradio as gr
import torch
from PIL import Image
from diffusers import DiffusionPipeline
from huggingface_hub import hf_hub_download
from sf3d.system import SF3D
import sf3d.utils as sf3d_utils
from gradio_litmodel3d import LitModel3D
from huggingface_hub import login
import subprocess

dtype = torch.bfloat16

torch.backends.cuda.matmul.allow_tf32 = True
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

device = torch.device('cuda')

import shutil

def find_cuda():
    # Check if CUDA_HOME or CUDA_PATH environment variables are set
    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')

    if cuda_home and os.path.exists(cuda_home):
        return cuda_home

    # Search for the nvcc executable in the system's PATH
    nvcc_path = shutil.which('nvcc')

    if nvcc_path:
        # Remove the 'bin/nvcc' part to get the CUDA installation path
        cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
        return cuda_path

    return None

cuda_path = find_cuda()

if cuda_path:
    print(f"CUDA installation found at: {cuda_path}")
else:
    print("CUDA installation not found")

login(token=huggingface_token)
# Set up environment and cache
cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
os.environ["TRANSFORMERS_CACHE"] = cache_path
os.environ["HF_HUB_CACHE"] = cache_path
os.environ["HF_HOME"] = cache_path

if not os.path.exists(cache_path):
    os.makedirs(cache_path, exist_ok=True)

# Initialize Flux pipeline
pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, token = huggingface_token).to(device)
pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
pipe.fuse_lora(lora_scale=0.125)
pipe.to(device="cuda", dtype=torch.bfloat16)

# Initialize SF3D model
sf3d_model = SF3D.from_pretrained(
    "stabilityai/stable-fast-3d",
    config_name="config.yaml",
    weight_name="model.safetensors",

).eval().to(device)

# Constants for SF3D
COND_WIDTH, COND_HEIGHT = 512, 512
COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
BACKGROUND_COLOR = [0.5, 0.5, 0.5]

c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
    COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
)

def generate_image(prompt, height, width, steps, scales, seed):
    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
        return pipe(
            prompt=[prompt],
            generator=torch.Generator().manual_seed(int(seed)),
            num_inference_steps=int(steps),
            guidance_scale=float(scales),
            height=int(height),
            width=int(width),
            max_sequence_length=256
        ).images[0]

def create_batch(input_image: Image.Image) -> dict:
    img_cond = torch.from_numpy(
        np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
    ).float().clip(0, 1)
    mask_cond = img_cond[:, :, -1:]
    rgb_cond = torch.lerp(
        torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
    )

    batch_elem = {
        "rgb_cond": rgb_cond,
        "mask_cond": mask_cond,
        "c2w_cond": c2w_cond.unsqueeze(0),
        "intrinsic_cond": intrinsic.unsqueeze(0),
        "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
    }
    return {k: v.unsqueeze(0) for k, v in batch_elem.items()}

def generate_3d_model(input_image):
    with torch.no_grad():
        with torch.autocast(device_type="cuda", dtype=torch.float16):
            model_batch = create_batch(input_image)
            model_batch = {k: v.cuda() for k, v in model_batch.items()}
            trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
            trimesh_mesh = trimesh_mesh[0]

    tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
    trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
    return tmp_file.name

@spaces.GPU
def process_and_generate(prompt, height, width, steps, scales, seed):
    # Generate image from prompt
    generated_image = generate_image(prompt, height, width, steps, scales, seed)
    
    # Generate 3D model from the image
    glb_file = generate_3d_model(generated_image)
    
    return generated_image, glb_file

# Gradio interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Text-to-3D Model Generator")
    
    with gr.Row():
        with gr.Column(scale=3):
            prompt = gr.Textbox(label="Your Image Description", lines=3)
            with gr.Accordion("Advanced Settings", open=False):
                height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
                width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
                steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
                scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
                seed = gr.Number(label="Seed", value=3413, precision=0)
            
            generate_btn = gr.Button("Generate 3D Model", variant="primary")

        with gr.Column(scale=4):
            output_image = gr.Image(label="Generated Image")
            output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])

    generate_btn.click(
        process_and_generate,
        inputs=[prompt, height, width, steps, scales, seed],
        outputs=[output_image, output_3d]
    )

if __name__ == "__main__":
    demo.launch()