gokaygokay commited on
Commit
fa09fe7
·
verified ·
1 Parent(s): 3d535fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -130
app.py CHANGED
@@ -1,131 +1,131 @@
1
- import os
2
- import tempfile
3
- import time
4
- import gradio as gr
5
- import torch
6
- from PIL import Image
7
- from diffusers import FluxPipeline
8
- from huggingface_hub import hf_hub_download
9
- from sf3d.system import SF3D
10
- import sf3d.utils as sf3d_utils
11
- from gradio_litmodel3d import LitModel3D
12
-
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
- dtype = torch.bfloat16
15
-
16
- torch.backends.cuda.matmul.allow_tf32 = True
17
- huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
18
- # Set up environment and cache
19
- cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
20
- os.environ["TRANSFORMERS_CACHE"] = cache_path
21
- os.environ["HF_HUB_CACHE"] = cache_path
22
- os.environ["HF_HOME"] = cache_path
23
-
24
- if not os.path.exists(cache_path):
25
- os.makedirs(cache_path, exist_ok=True)
26
-
27
- # Initialize Flux pipeline
28
- pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, token=huggingface_token)
29
- pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
30
- pipe.fuse_lora(lora_scale=0.125)
31
- pipe.to(device="cuda", dtype=torch.bfloat16)
32
-
33
- # Initialize SF3D model
34
- sf3d_model = SF3D.from_pretrained(
35
- "stabilityai/stable-fast-3d",
36
- config_name="config.yaml",
37
- weight_name="model.safetensors",
38
- token=huggingface_token
39
-
40
- )
41
- sf3d_model.eval().cuda()
42
-
43
- # Constants for SF3D
44
- COND_WIDTH, COND_HEIGHT = 512, 512
45
- COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
46
- BACKGROUND_COLOR = [0.5, 0.5, 0.5]
47
-
48
- c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
49
- intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
50
- COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
51
- )
52
-
53
- def generate_image(prompt, height, width, steps, scales, seed):
54
- with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
55
- return pipe(
56
- prompt=[prompt],
57
- generator=torch.Generator().manual_seed(int(seed)),
58
- num_inference_steps=int(steps),
59
- guidance_scale=float(scales),
60
- height=int(height),
61
- width=int(width),
62
- max_sequence_length=256
63
- ).images[0]
64
-
65
- def create_batch(input_image: Image.Image) -> dict:
66
- img_cond = torch.from_numpy(
67
- np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
68
- ).float().clip(0, 1)
69
- mask_cond = img_cond[:, :, -1:]
70
- rgb_cond = torch.lerp(
71
- torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
72
- )
73
-
74
- batch_elem = {
75
- "rgb_cond": rgb_cond,
76
- "mask_cond": mask_cond,
77
- "c2w_cond": c2w_cond.unsqueeze(0),
78
- "intrinsic_cond": intrinsic.unsqueeze(0),
79
- "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
80
- }
81
- return {k: v.unsqueeze(0) for k, v in batch_elem.items()}
82
-
83
- def generate_3d_model(input_image):
84
- with torch.no_grad():
85
- with torch.autocast(device_type="cuda", dtype=torch.float16):
86
- model_batch = create_batch(input_image)
87
- model_batch = {k: v.cuda() for k, v in model_batch.items()}
88
- trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
89
- trimesh_mesh = trimesh_mesh[0]
90
-
91
- tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
92
- trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
93
- return tmp_file.name
94
-
95
- def process_and_generate(prompt, height, width, steps, scales, seed):
96
- # Generate image from prompt
97
- generated_image = generate_image(prompt, height, width, steps, scales, seed)
98
-
99
- # Generate 3D model from the image
100
- glb_file = generate_3d_model(generated_image)
101
-
102
- return generated_image, glb_file
103
-
104
- # Gradio interface
105
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
106
- gr.Markdown("# Text-to-3D Model Generator")
107
-
108
- with gr.Row():
109
- with gr.Column(scale=3):
110
- prompt = gr.Textbox(label="Your Image Description", lines=3)
111
- with gr.Accordion("Advanced Settings", open=False):
112
- height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
113
- width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
114
- steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
115
- scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
116
- seed = gr.Number(label="Seed", value=3413, precision=0)
117
-
118
- generate_btn = gr.Button("Generate 3D Model", variant="primary")
119
-
120
- with gr.Column(scale=4):
121
- output_image = gr.Image(label="Generated Image")
122
- output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])
123
-
124
- generate_btn.click(
125
- process_and_generate,
126
- inputs=[prompt, height, width, steps, scales, seed],
127
- outputs=[output_image, output_3d]
128
- )
129
-
130
- if __name__ == "__main__":
131
  demo.launch()
 
1
+ import os
2
+ import tempfile
3
+ import time
4
+ import gradio as gr
5
+ import torch
6
+ from PIL import Image
7
+ from diffusers import DiffusionPipeline
8
+ from huggingface_hub import hf_hub_download
9
+ from sf3d.system import SF3D
10
+ import sf3d.utils as sf3d_utils
11
+ from gradio_litmodel3d import LitModel3D
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ dtype = torch.bfloat16
15
+
16
+ torch.backends.cuda.matmul.allow_tf32 = True
17
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
18
+ # Set up environment and cache
19
+ cache_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models")
20
+ os.environ["TRANSFORMERS_CACHE"] = cache_path
21
+ os.environ["HF_HUB_CACHE"] = cache_path
22
+ os.environ["HF_HOME"] = cache_path
23
+
24
+ if not os.path.exists(cache_path):
25
+ os.makedirs(cache_path, exist_ok=True)
26
+
27
+ # Initialize Flux pipeline
28
+ pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=dtype, token = huggingface_token).to(device)
29
+ pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"))
30
+ pipe.fuse_lora(lora_scale=0.125)
31
+ pipe.to(device="cuda", dtype=torch.bfloat16)
32
+
33
+ # Initialize SF3D model
34
+ sf3d_model = SF3D.from_pretrained(
35
+ "stabilityai/stable-fast-3d",
36
+ config_name="config.yaml",
37
+ weight_name="model.safetensors",
38
+ token=huggingface_token
39
+
40
+ )
41
+ sf3d_model.eval().cuda()
42
+
43
+ # Constants for SF3D
44
+ COND_WIDTH, COND_HEIGHT = 512, 512
45
+ COND_DISTANCE, COND_FOVY_DEG = 1.6, 40
46
+ BACKGROUND_COLOR = [0.5, 0.5, 0.5]
47
+
48
+ c2w_cond = sf3d_utils.default_cond_c2w(COND_DISTANCE)
49
+ intrinsic, intrinsic_normed_cond = sf3d_utils.create_intrinsic_from_fov_deg(
50
+ COND_FOVY_DEG, COND_HEIGHT, COND_WIDTH
51
+ )
52
+
53
+ def generate_image(prompt, height, width, steps, scales, seed):
54
+ with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
55
+ return pipe(
56
+ prompt=[prompt],
57
+ generator=torch.Generator().manual_seed(int(seed)),
58
+ num_inference_steps=int(steps),
59
+ guidance_scale=float(scales),
60
+ height=int(height),
61
+ width=int(width),
62
+ max_sequence_length=256
63
+ ).images[0]
64
+
65
+ def create_batch(input_image: Image.Image) -> dict:
66
+ img_cond = torch.from_numpy(
67
+ np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32) / 255.0
68
+ ).float().clip(0, 1)
69
+ mask_cond = img_cond[:, :, -1:]
70
+ rgb_cond = torch.lerp(
71
+ torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
72
+ )
73
+
74
+ batch_elem = {
75
+ "rgb_cond": rgb_cond,
76
+ "mask_cond": mask_cond,
77
+ "c2w_cond": c2w_cond.unsqueeze(0),
78
+ "intrinsic_cond": intrinsic.unsqueeze(0),
79
+ "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
80
+ }
81
+ return {k: v.unsqueeze(0) for k, v in batch_elem.items()}
82
+
83
+ def generate_3d_model(input_image):
84
+ with torch.no_grad():
85
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
86
+ model_batch = create_batch(input_image)
87
+ model_batch = {k: v.cuda() for k, v in model_batch.items()}
88
+ trimesh_mesh, _ = sf3d_model.generate_mesh(model_batch, 1024)
89
+ trimesh_mesh = trimesh_mesh[0]
90
+
91
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".glb")
92
+ trimesh_mesh.export(tmp_file.name, file_type="glb", include_normals=True)
93
+ return tmp_file.name
94
+
95
+ def process_and_generate(prompt, height, width, steps, scales, seed):
96
+ # Generate image from prompt
97
+ generated_image = generate_image(prompt, height, width, steps, scales, seed)
98
+
99
+ # Generate 3D model from the image
100
+ glb_file = generate_3d_model(generated_image)
101
+
102
+ return generated_image, glb_file
103
+
104
+ # Gradio interface
105
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
106
+ gr.Markdown("# Text-to-3D Model Generator")
107
+
108
+ with gr.Row():
109
+ with gr.Column(scale=3):
110
+ prompt = gr.Textbox(label="Your Image Description", lines=3)
111
+ with gr.Accordion("Advanced Settings", open=False):
112
+ height = gr.Slider(label="Height", minimum=256, maximum=1152, step=64, value=1024)
113
+ width = gr.Slider(label="Width", minimum=256, maximum=1152, step=64, value=1024)
114
+ steps = gr.Slider(label="Inference Steps", minimum=6, maximum=25, step=1, value=8)
115
+ scales = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5)
116
+ seed = gr.Number(label="Seed", value=3413, precision=0)
117
+
118
+ generate_btn = gr.Button("Generate 3D Model", variant="primary")
119
+
120
+ with gr.Column(scale=4):
121
+ output_image = gr.Image(label="Generated Image")
122
+ output_3d = LitModel3D(label="3D Model", clear_color=[0.0, 0.0, 0.0, 0.0])
123
+
124
+ generate_btn.click(
125
+ process_and_generate,
126
+ inputs=[prompt, height, width, steps, scales, seed],
127
+ outputs=[output_image, output_3d]
128
+ )
129
+
130
+ if __name__ == "__main__":
131
  demo.launch()