Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,121 +1,121 @@
|
|
1 |
import spaces
|
2 |
from spaces.zero.decorator import GPU
|
3 |
|
4 |
-
import os
|
5 |
-
import tyro
|
6 |
-
import imageio
|
7 |
import numpy as np
|
8 |
import tqdm
|
9 |
import torch
|
|
|
10 |
import torch.nn.functional as F
|
|
|
11 |
from safetensors.torch import load_file
|
12 |
import rembg
|
13 |
import gradio as gr
|
14 |
-
|
15 |
import kiui
|
16 |
from kiui.op import recenter
|
17 |
from kiui.cam import orbit_camera
|
18 |
-
from core.utils import get_rays
|
|
|
19 |
from core.options import AllConfigs, Options
|
20 |
-
from core.models import LTRFM_Mesh,
|
21 |
from core.instant_utils.mesh_util import save_obj, save_obj_with_mtl
|
22 |
from mvdream.pipeline_mvdream import MVDreamPipeline
|
23 |
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
|
24 |
from huggingface_hub import hf_hub_download
|
25 |
|
|
|
26 |
|
27 |
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
28 |
-
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
29 |
-
GRADIO_VIDEO_PATH = 'gradio_output.mp4'
|
30 |
-
GRADIO_OBJ_PATH = 'gradio_output_rgb.obj'
|
31 |
GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
|
32 |
GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
|
33 |
|
|
|
|
|
34 |
ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM_6V_SDF.ckpt")
|
35 |
|
36 |
opt = Options(
|
37 |
-
input_size=512,
|
38 |
down_channels=(32, 64, 128, 256, 512),
|
39 |
down_attention=(False, False, False, False, True),
|
40 |
up_channels=(512, 256, 128),
|
41 |
up_attention=(True, False, False, False),
|
42 |
volume_mode='TRF_NeRF',
|
43 |
splat_size=64,
|
44 |
-
output_size=62,
|
45 |
data_mode='s5',
|
46 |
num_views=8,
|
47 |
-
gradient_accumulation_steps=1,
|
48 |
mixed_precision='bf16',
|
49 |
resume=ckpt_path,
|
50 |
)
|
51 |
|
52 |
-
|
|
|
53 |
if opt.volume_mode == 'TRF_Mesh':
|
54 |
model = LTRFM_Mesh(opt)
|
55 |
elif opt.volume_mode == 'TRF_NeRF':
|
56 |
model = LTRFM_NeRF(opt)
|
57 |
else:
|
58 |
-
model =
|
59 |
|
60 |
-
#
|
61 |
-
if opt.resume:
|
62 |
if opt.resume.endswith('safetensors'):
|
63 |
ckpt = load_file(opt.resume, device='cpu')
|
64 |
-
else:
|
65 |
ckpt_dict = torch.load(opt.resume, map_location='cpu')
|
66 |
-
ckpt
|
67 |
|
68 |
state_dict = model.state_dict()
|
69 |
for k, v in ckpt.items():
|
70 |
-
k
|
71 |
-
if k in state_dict:
|
72 |
if state_dict[k].shape == v.shape:
|
73 |
state_dict[k].copy_(v)
|
74 |
else:
|
75 |
print(f'[WARN] mismatching shape for param {k}: ckpt {v.shape} != model {state_dict[k].shape}, ignored.')
|
76 |
else:
|
77 |
print(f'[WARN] unexpected param {k}: {v.shape}')
|
78 |
-
print('[INFO] load resume success!')
|
79 |
|
80 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
81 |
model = model.half().to(device)
|
82 |
-
model.eval()
|
83 |
-
|
84 |
-
tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
|
85 |
-
proj_matrix = torch.zeros(4, 4, dtype=torch.float32).to(device)
|
86 |
-
proj_matrix[0, 0] = 1 / tan_half_fov
|
87 |
-
proj_matrix[1, 1] = 1 / tan_half_fov
|
88 |
-
proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
|
89 |
proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
|
90 |
proj_matrix[2, 3] = 1
|
91 |
|
92 |
-
#
|
93 |
pipe_text = MVDreamPipeline.from_pretrained(
|
94 |
-
'ashawkey/mvdream-sd2.1-diffusers',
|
95 |
torch_dtype=torch.float16,
|
96 |
trust_remote_code=True,
|
|
|
97 |
)
|
98 |
pipe_text = pipe_text.to(device)
|
99 |
|
|
|
100 |
pipe_image = MVDreamPipeline.from_pretrained(
|
101 |
-
"ashawkey/imagedream-ipmv-diffusers",
|
102 |
torch_dtype=torch.float16,
|
103 |
trust_remote_code=True,
|
|
|
104 |
)
|
105 |
pipe_image = pipe_image.to(device)
|
106 |
|
|
|
|
|
107 |
pipe_image_plus = DiffusionPipeline.from_pretrained(
|
108 |
-
"sudo-ai/zero123plus-v1.2",
|
109 |
custom_pipeline="zero123plus",
|
110 |
torch_dtype=torch.float16,
|
111 |
trust_remote_code=True,
|
|
|
112 |
)
|
113 |
pipe_image_plus.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
114 |
pipe_image_plus.scheduler.config, timestep_spacing='trailing'
|
115 |
)
|
116 |
|
117 |
-
unet_path
|
118 |
|
|
|
119 |
if os.path.exists(unet_path):
|
120 |
unet_ckpt_path = unet_path
|
121 |
else:
|
@@ -125,140 +125,329 @@ state_dict = torch.load(unet_ckpt_path, map_location='cpu')
|
|
125 |
pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
|
126 |
pipe_image_plus = pipe_image_plus.to(device)
|
127 |
|
128 |
-
#
|
129 |
bg_remover = rembg.new_session()
|
130 |
|
|
|
131 |
@spaces.GPU
|
132 |
def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
|
|
|
133 |
kiui.seed_everything(input_seed)
|
134 |
-
os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
|
135 |
|
|
|
|
|
|
|
136 |
if condition_input_image is None:
|
137 |
mv_image_uint8 = pipe_text(prompt, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=7.5, elevation=input_elevation)
|
138 |
mv_image_uint8 = (mv_image_uint8 * 255).astype(np.uint8)
|
139 |
-
|
140 |
mv_image = []
|
141 |
for i in range(4):
|
142 |
-
image = rembg.remove(mv_image_uint8[i], session=bg_remover)
|
|
|
143 |
image = image.astype(np.float32) / 255
|
144 |
image = recenter(image, image[..., 0] > 0, border_ratio=0.2)
|
145 |
image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
|
146 |
mv_image.append(image)
|
147 |
-
|
148 |
-
mv_image_grid = np.concatenate([mv_image[1], mv_image[2],
|
149 |
input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
|
150 |
-
|
151 |
-
|
|
|
152 |
else:
|
153 |
-
condition_input_image = np.array(condition_input_image)
|
154 |
-
|
|
|
155 |
mask = carved_image[..., -1] > 0
|
156 |
image = recenter(carved_image, mask, border_ratio=0.2)
|
157 |
image = image.astype(np.float32) / 255.0
|
158 |
processed_image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
|
159 |
-
|
160 |
-
if mv_moedl_option
|
161 |
-
mv_image = pipe_image(prompt, processed_image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0,
|
162 |
-
|
|
|
163 |
input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
|
164 |
|
165 |
else:
|
166 |
from PIL import Image
|
167 |
-
from einops import rearrange
|
168 |
-
|
|
|
169 |
processed_image = Image.fromarray((processed_image * 255).astype(np.uint8))
|
170 |
mv_image = pipe_image_plus(processed_image, num_inference_steps=input_num_steps).images[0]
|
171 |
mv_image = np.asarray(mv_image, dtype=np.float32) / 255.0
|
172 |
-
mv_image = torch.from_numpy(mv_image).permute(2, 0, 1).contiguous().float()
|
173 |
mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
|
174 |
mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
|
175 |
input_image = mv_image
|
|
|
176 |
|
177 |
-
return mv_image_grid, processed_image, input_image
|
178 |
|
179 |
@spaces.GPU
|
180 |
def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
|
181 |
kiui.seed_everything(input_seed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
-
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
-
input_rays_o, input_rays_d = get_rays(opt, proj_matrix, device, 'center')
|
192 |
|
193 |
-
with torch.no_grad():
|
194 |
-
preds = model(
|
195 |
-
cond_img=input_image,
|
196 |
-
rays=(input_rays_o, input_rays_d)
|
197 |
-
)
|
198 |
-
|
199 |
-
pred_rgb = preds[0].permute(0, 2, 3, 1).contiguous().cpu().numpy()
|
200 |
-
pred_albedo = preds[1].permute(0, 2, 3, 1).contiguous().cpu().numpy()
|
201 |
-
pred_shading = preds[2].permute(0, 2, 3, 1).contiguous().cpu().numpy()
|
202 |
-
|
203 |
-
save_obj(output_obj_rgb_path, pred_rgb)
|
204 |
-
save_obj_with_mtl(output_obj_albedo_path, pred_albedo, mode="albedo")
|
205 |
-
save_obj_with_mtl(output_obj_shading_path, pred_shading, mode="shading")
|
206 |
-
|
207 |
-
camera_positions = orbit_camera(type="spherical", radius=2.5, h=3, w=2)
|
208 |
-
output_frames = []
|
209 |
-
for pose in tqdm.tqdm(camera_positions, ncols=0):
|
210 |
-
with torch.no_grad():
|
211 |
-
preds = model(cond_img=input_image, rays=get_rays(opt, proj_matrix, device, pose))
|
212 |
-
pred_rgb = preds[0].permute(0, 2, 3, 1).contiguous().cpu().numpy()
|
213 |
-
output_frames.append(pred_rgb)
|
214 |
-
output_frames = np.stack(output_frames, axis=0)
|
215 |
-
|
216 |
-
imageio.mimwrite(output_video_path, output_frames, fps=24, quality=8)
|
217 |
-
return output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path, output_video_path
|
218 |
-
|
219 |
-
def update_mv_model(mv_moedl_option):
|
220 |
-
if mv_moedl_option == 'mvdream':
|
221 |
-
return gr.update(visible=False)
|
222 |
-
else:
|
223 |
-
return gr.update(visible=True)
|
224 |
|
225 |
-
# Gradio interface
|
226 |
-
with gr.Blocks() as demo:
|
227 |
-
gr.Markdown(
|
228 |
-
"## Generate 3D object from text or image prompt"
|
229 |
-
)
|
230 |
with gr.Row():
|
231 |
-
with gr.Column():
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import spaces
|
2 |
from spaces.zero.decorator import GPU
|
3 |
|
|
|
|
|
|
|
4 |
import numpy as np
|
5 |
import tqdm
|
6 |
import torch
|
7 |
+
import torch.nn as nn
|
8 |
import torch.nn.functional as F
|
9 |
+
import torchvision.transforms.functional as TF
|
10 |
from safetensors.torch import load_file
|
11 |
import rembg
|
12 |
import gradio as gr
|
|
|
13 |
import kiui
|
14 |
from kiui.op import recenter
|
15 |
from kiui.cam import orbit_camera
|
16 |
+
from core.utils import get_rays, grid_distortion, orbit_camera_jitter
|
17 |
+
|
18 |
from core.options import AllConfigs, Options
|
19 |
+
from core.models import LTRFM_Mesh,LTRFM_NeRF
|
20 |
from core.instant_utils.mesh_util import save_obj, save_obj_with_mtl
|
21 |
from mvdream.pipeline_mvdream import MVDreamPipeline
|
22 |
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
|
23 |
from huggingface_hub import hf_hub_download
|
24 |
|
25 |
+
import spaces
|
26 |
|
27 |
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
|
|
|
|
|
|
28 |
GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
|
29 |
GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
|
30 |
|
31 |
+
#opt = tyro.cli(AllConfigs)
|
32 |
+
|
33 |
ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM_6V_SDF.ckpt")
|
34 |
|
35 |
opt = Options(
|
36 |
+
input_size=512,
|
37 |
down_channels=(32, 64, 128, 256, 512),
|
38 |
down_attention=(False, False, False, False, True),
|
39 |
up_channels=(512, 256, 128),
|
40 |
up_attention=(True, False, False, False),
|
41 |
volume_mode='TRF_NeRF',
|
42 |
splat_size=64,
|
43 |
+
output_size=62, #crop patch
|
44 |
data_mode='s5',
|
45 |
num_views=8,
|
46 |
+
gradient_accumulation_steps=1, #2
|
47 |
mixed_precision='bf16',
|
48 |
resume=ckpt_path,
|
49 |
)
|
50 |
|
51 |
+
|
52 |
+
# model
|
53 |
if opt.volume_mode == 'TRF_Mesh':
|
54 |
model = LTRFM_Mesh(opt)
|
55 |
elif opt.volume_mode == 'TRF_NeRF':
|
56 |
model = LTRFM_NeRF(opt)
|
57 |
else:
|
58 |
+
model = LGM(opt)
|
59 |
|
60 |
+
# resume pretrained checkpoint
|
61 |
+
if opt.resume is not None:
|
62 |
if opt.resume.endswith('safetensors'):
|
63 |
ckpt = load_file(opt.resume, device='cpu')
|
64 |
+
else: #ckpt
|
65 |
ckpt_dict = torch.load(opt.resume, map_location='cpu')
|
66 |
+
ckpt=ckpt_dict["model"]
|
67 |
|
68 |
state_dict = model.state_dict()
|
69 |
for k, v in ckpt.items():
|
70 |
+
k=k.replace('module.', '')
|
71 |
+
if k in state_dict:
|
72 |
if state_dict[k].shape == v.shape:
|
73 |
state_dict[k].copy_(v)
|
74 |
else:
|
75 |
print(f'[WARN] mismatching shape for param {k}: ckpt {v.shape} != model {state_dict[k].shape}, ignored.')
|
76 |
else:
|
77 |
print(f'[WARN] unexpected param {k}: {v.shape}')
|
78 |
+
print(f'[INFO] load resume success!')
|
79 |
|
80 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
81 |
model = model.half().to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
|
83 |
proj_matrix[2, 3] = 1
|
84 |
|
85 |
+
# load dreams
|
86 |
pipe_text = MVDreamPipeline.from_pretrained(
|
87 |
+
'ashawkey/mvdream-sd2.1-diffusers', # remote weights
|
88 |
torch_dtype=torch.float16,
|
89 |
trust_remote_code=True,
|
90 |
+
# local_files_only=True,
|
91 |
)
|
92 |
pipe_text = pipe_text.to(device)
|
93 |
|
94 |
+
# mvdream
|
95 |
pipe_image = MVDreamPipeline.from_pretrained(
|
96 |
+
"ashawkey/imagedream-ipmv-diffusers", # remote weights
|
97 |
torch_dtype=torch.float16,
|
98 |
trust_remote_code=True,
|
99 |
+
# local_files_only=True,
|
100 |
)
|
101 |
pipe_image = pipe_image.to(device)
|
102 |
|
103 |
+
|
104 |
+
print('Loading 123plus model ...')
|
105 |
pipe_image_plus = DiffusionPipeline.from_pretrained(
|
106 |
+
"sudo-ai/zero123plus-v1.2",
|
107 |
custom_pipeline="zero123plus",
|
108 |
torch_dtype=torch.float16,
|
109 |
trust_remote_code=True,
|
110 |
+
#local_files_only=True,
|
111 |
)
|
112 |
pipe_image_plus.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
113 |
pipe_image_plus.scheduler.config, timestep_spacing='trailing'
|
114 |
)
|
115 |
|
116 |
+
unet_path='./pretrained/diffusion_pytorch_model.bin'
|
117 |
|
118 |
+
print('Loading custom white-background unet ...')
|
119 |
if os.path.exists(unet_path):
|
120 |
unet_ckpt_path = unet_path
|
121 |
else:
|
|
|
125 |
pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
|
126 |
pipe_image_plus = pipe_image_plus.to(device)
|
127 |
|
128 |
+
# load rembg
|
129 |
bg_remover = rembg.new_session()
|
130 |
|
131 |
+
|
132 |
@spaces.GPU
|
133 |
def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
|
134 |
+
# seed
|
135 |
kiui.seed_everything(input_seed)
|
|
|
136 |
|
137 |
+
os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
|
138 |
+
|
139 |
+
# text-conditioned
|
140 |
if condition_input_image is None:
|
141 |
mv_image_uint8 = pipe_text(prompt, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=7.5, elevation=input_elevation)
|
142 |
mv_image_uint8 = (mv_image_uint8 * 255).astype(np.uint8)
|
143 |
+
# bg removal
|
144 |
mv_image = []
|
145 |
for i in range(4):
|
146 |
+
image = rembg.remove(mv_image_uint8[i], session=bg_remover) # [H, W, 4]
|
147 |
+
# to white bg
|
148 |
image = image.astype(np.float32) / 255
|
149 |
image = recenter(image, image[..., 0] > 0, border_ratio=0.2)
|
150 |
image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
|
151 |
mv_image.append(image)
|
152 |
+
|
153 |
+
mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
|
154 |
input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
|
155 |
+
|
156 |
+
processed_image=None
|
157 |
+
# image-conditioned (may also input text, but no text usually works too)
|
158 |
else:
|
159 |
+
condition_input_image = np.array(condition_input_image) # uint8
|
160 |
+
# bg removal
|
161 |
+
carved_image = rembg.remove(condition_input_image, session=bg_remover) # [H, W, 4]
|
162 |
mask = carved_image[..., -1] > 0
|
163 |
image = recenter(carved_image, mask, border_ratio=0.2)
|
164 |
image = image.astype(np.float32) / 255.0
|
165 |
processed_image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
|
166 |
+
|
167 |
+
if mv_moedl_option=='mvdream':
|
168 |
+
mv_image = pipe_image(prompt, processed_image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
|
169 |
+
|
170 |
+
mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
|
171 |
input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
|
172 |
|
173 |
else:
|
174 |
from PIL import Image
|
175 |
+
from einops import rearrange, repeat
|
176 |
+
|
177 |
+
# input_image=input_image* 255
|
178 |
processed_image = Image.fromarray((processed_image * 255).astype(np.uint8))
|
179 |
mv_image = pipe_image_plus(processed_image, num_inference_steps=input_num_steps).images[0]
|
180 |
mv_image = np.asarray(mv_image, dtype=np.float32) / 255.0
|
181 |
+
mv_image = torch.from_numpy(mv_image).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
|
182 |
mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
|
183 |
mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
|
184 |
input_image = mv_image
|
185 |
+
return mv_image_grid, processed_image, input_image
|
186 |
|
|
|
187 |
|
188 |
@spaces.GPU
|
189 |
def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
|
190 |
kiui.seed_everything(input_seed)
|
191 |
+
|
192 |
+
output_obj_rgb_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_PATH)
|
193 |
+
output_obj_albedo_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_ALBEDO_PATH)
|
194 |
+
output_obj_shading_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_SHADING_PATH)
|
195 |
+
|
196 |
+
output_video_path = os.path.join(opt.workspace,"gradio", GRADIO_VIDEO_PATH)
|
197 |
+
# generate gaussians
|
198 |
+
# [4, 256, 256, 3], float32
|
199 |
+
input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
|
200 |
+
input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
|
201 |
|
202 |
+
images_input_vit = F.interpolate(input_image, size=(224, 224), mode='bilinear', align_corners=False)
|
203 |
+
|
204 |
+
data = {}
|
205 |
+
input_image = input_image.unsqueeze(0) # [1, 4, 9, H, W]
|
206 |
+
images_input_vit=images_input_vit.unsqueeze(0)
|
207 |
+
data['input_vit']=images_input_vit
|
208 |
+
|
209 |
+
elevation = 0
|
210 |
+
cam_poses =[]
|
211 |
+
if mv_moedl_option=='mvdream' or condition_input_image is None:
|
212 |
+
azimuth = np.arange(0, 360, 90, dtype=np.int32)
|
213 |
+
for azi in tqdm.tqdm(azimuth):
|
214 |
+
cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
215 |
+
cam_poses.append(cam_pose)
|
216 |
+
else:
|
217 |
+
azimuth = np.arange(30, 360, 60, dtype=np.int32)
|
218 |
+
cnt = 0
|
219 |
+
for azi in tqdm.tqdm(azimuth):
|
220 |
+
if (cnt+1) % 2!= 0:
|
221 |
+
elevation=-20
|
222 |
+
else:
|
223 |
+
elevation=30
|
224 |
+
cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
|
225 |
+
cam_poses.append(cam_pose)
|
226 |
+
cnt=cnt+1
|
227 |
+
|
228 |
+
cam_poses = torch.cat(cam_poses,0)
|
229 |
+
radius = torch.norm(cam_poses[0, :3, 3])
|
230 |
+
cam_poses[:, :3, 3] *= opt.cam_radius / radius
|
231 |
+
transform = torch.tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, opt.cam_radius], [0, 0, 0, 1]], dtype=torch.float32).to(device) @ torch.inverse(cam_poses[0])
|
232 |
+
cam_poses = transform.unsqueeze(0) @ cam_poses
|
233 |
+
|
234 |
+
cam_poses=cam_poses.unsqueeze(0)
|
235 |
+
data['source_camera']=cam_poses
|
236 |
+
|
237 |
+
with torch.no_grad():
|
238 |
+
if opt.volume_mode == 'TRF_Mesh':
|
239 |
+
with torch.autocast(device_type='cuda', dtype=torch.float32):
|
240 |
+
svd_volume = model.forward_svd_volume(input_image,data)
|
241 |
+
else:
|
242 |
+
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
243 |
+
svd_volume = model.forward_svd_volume(input_image,data)
|
244 |
+
|
245 |
+
#time-consuming
|
246 |
+
export_texmap=False
|
247 |
+
|
248 |
+
mesh_out = model.extract_mesh(svd_volume,use_texture_map=export_texmap)
|
249 |
+
|
250 |
+
if export_texmap:
|
251 |
+
vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
|
252 |
+
|
253 |
+
for i in range(len(tex_map)):
|
254 |
+
mesh_path=os.path.join(opt.workspace, name + str(i) + '_'+ str(seed)+ '.obj')
|
255 |
+
save_obj_with_mtl(
|
256 |
+
vertices.data.cpu().numpy(),
|
257 |
+
uvs.data.cpu().numpy(),
|
258 |
+
faces.data.cpu().numpy(),
|
259 |
+
mesh_tex_idx.data.cpu().numpy(),
|
260 |
+
tex_map[i].permute(1, 2, 0).data.cpu().numpy(),
|
261 |
+
mesh_path,
|
262 |
+
)
|
263 |
+
else:
|
264 |
+
vertices, faces, vertex_colors = mesh_out
|
265 |
|
266 |
+
save_obj(vertices, faces, vertex_colors[0], output_obj_rgb_path)
|
267 |
+
save_obj(vertices, faces, vertex_colors[1], output_obj_albedo_path)
|
268 |
+
save_obj(vertices, faces, vertex_colors[2], output_obj_shading_path)
|
269 |
+
|
270 |
+
# images=[]
|
271 |
+
# azimuth = np.arange(0, 360, 6, dtype=np.int32)
|
272 |
+
# for azi in tqdm.tqdm(azimuth):
|
273 |
+
|
274 |
+
# cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True))
|
275 |
+
|
276 |
+
# if opt.volume_mode == 'TRF_Mesh':
|
277 |
+
# cam_view = torch.inverse(cam_pose)
|
278 |
+
# cam_view=cam_view.unsqueeze(0).unsqueeze(0).to(device)
|
279 |
+
# data['w2c'] = cam_view
|
280 |
+
# with torch.autocast(device_type='cuda', dtype=torch.float32):
|
281 |
+
# render_images=model.render_frame(data)
|
282 |
+
# else:
|
283 |
+
# rays_o, rays_d = get_rays(cam_pose, opt.infer_render_size, opt.infer_render_size, opt.fovy) # [h, w, 3]
|
284 |
+
# rays_o=rays_o.unsqueeze(0).unsqueeze(0).to(device)# B,V,H,W,3
|
285 |
+
# rays_d=rays_d.unsqueeze(0).unsqueeze(0).to(device)
|
286 |
+
# data['all_rays_o']=rays_o
|
287 |
+
# data['all_rays_d']=rays_d
|
288 |
+
# with torch.autocast(device_type='cuda', dtype=torch.float16):
|
289 |
+
# render_images=model.render_frame(data)
|
290 |
+
# image=render_images['images_pred']
|
291 |
+
|
292 |
+
# images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
|
293 |
+
|
294 |
+
# images = np.concatenate(images, axis=0)
|
295 |
+
# imageio.mimwrite(output_video_path, images, fps=30)
|
296 |
+
|
297 |
+
|
298 |
+
return output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path #, output_video_path
|
299 |
+
|
300 |
+
|
301 |
+
# gradio UI
|
302 |
+
|
303 |
+
_TITLE = '''LDM: Large Tensorial SDF Model for Textured Mesh Generation'''
|
304 |
+
|
305 |
+
_DESCRIPTION = '''
|
306 |
+
|
307 |
+
|
308 |
+
|
309 |
+
* Input can be text prompt, image.
|
310 |
+
* The currently supported multi-view diffusion models include the image-conditioned MVdream and Zero123plus, as well as the text-conditioned Imagedream.
|
311 |
+
* If you find the output unsatisfying, try using different multi-view diffusion models or seeds!
|
312 |
+
'''
|
313 |
+
|
314 |
+
|
315 |
+
|
316 |
+
|
317 |
+
|
318 |
+
|
319 |
+
|
320 |
+
|
321 |
+
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
|
326 |
+
|
327 |
+
|
328 |
+
|
329 |
+
|
330 |
+
|
331 |
+
|
332 |
+
|
333 |
+
|
334 |
+
|
335 |
+
|
336 |
+
|
337 |
+
|
338 |
+
|
339 |
+
|
340 |
+
|
341 |
+
block = gr.Blocks(title=_TITLE).queue()
|
342 |
+
with block:
|
343 |
|
|
|
344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
|
|
|
|
|
|
|
|
|
|
346 |
with gr.Row():
|
347 |
+
with gr.Column(scale=1):
|
348 |
+
gr.Markdown('# ' + _TITLE)
|
349 |
+
gr.Markdown(_DESCRIPTION)
|
350 |
+
|
351 |
+
with gr.Row(variant='panel'):
|
352 |
+
with gr.Column(scale=1):
|
353 |
+
with gr.Tab("Image-to-3D"):
|
354 |
+
# input image
|
355 |
+
with gr.Row():
|
356 |
+
condition_input_image = gr.Image(
|
357 |
+
label="Input Image",
|
358 |
+
image_mode="RGBA",
|
359 |
+
type="pil"
|
360 |
+
)
|
361 |
+
|
362 |
+
processed_image = gr.Image(
|
363 |
+
label="Processed Image",
|
364 |
+
image_mode="RGBA",
|
365 |
+
type="pil",
|
366 |
+
interactive=False
|
367 |
+
)
|
368 |
+
|
369 |
+
|
370 |
+
with gr.Row():
|
371 |
+
mv_moedl_option = gr.Radio([
|
372 |
+
"zero123plus",
|
373 |
+
"mvdream"
|
374 |
+
], value="zero123plus",
|
375 |
+
label="Multi-view Diffusion")
|
376 |
+
|
377 |
+
with gr.Row(variant="panel"):
|
378 |
+
gr.Examples(
|
379 |
+
examples=[
|
380 |
+
os.path.join("example", img_name) for img_name in sorted(os.listdir("example"))
|
381 |
+
],
|
382 |
+
inputs=[condition_input_image],
|
383 |
+
fn=lambda x: process(condition_input_image=x, prompt=''),
|
384 |
+
cache_examples=False,
|
385 |
+
examples_per_page=20,
|
386 |
+
label='Image-to-3D Examples'
|
387 |
+
)
|
388 |
+
|
389 |
+
with gr.Tab("Text-to-3D"):
|
390 |
+
# input prompt
|
391 |
+
with gr.Row():
|
392 |
+
input_text = gr.Textbox(label="prompt")
|
393 |
+
# negative prompt
|
394 |
+
with gr.Row():
|
395 |
+
input_neg_text = gr.Textbox(label="negative prompt", value='ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate')
|
396 |
+
|
397 |
+
with gr.Row(variant="panel"):
|
398 |
+
gr.Examples(
|
399 |
+
examples=[
|
400 |
+
"a hamburger",
|
401 |
+
"a furry red fox head",
|
402 |
+
"a teddy bear",
|
403 |
+
"a motorbike",
|
404 |
+
],
|
405 |
+
inputs=[input_text],
|
406 |
+
fn=lambda x: process(condition_input_image=None, prompt=x),
|
407 |
+
cache_examples=False,
|
408 |
+
label='Text-to-3D Examples'
|
409 |
+
)
|
410 |
+
|
411 |
+
# elevation
|
412 |
+
input_elevation = gr.Slider(label="elevation", minimum=-90, maximum=90, step=1, value=0)
|
413 |
+
# inference steps
|
414 |
+
input_num_steps = gr.Slider(label="inference steps", minimum=1, maximum=100, step=1, value=30)
|
415 |
+
# random seed
|
416 |
+
input_seed = gr.Slider(label="random seed", minimum=0, maximum=100000, step=1, value=0)
|
417 |
+
# gen button
|
418 |
+
button_gen = gr.Button("Generate")
|
419 |
+
|
420 |
+
|
421 |
+
with gr.Column(scale=1):
|
422 |
+
with gr.Row():
|
423 |
+
# multi-view results
|
424 |
+
mv_image_grid = gr.Image(interactive=False, show_label=False)
|
425 |
+
# with gr.Row():
|
426 |
+
# output_video_path = gr.Video(label="video")
|
427 |
+
with gr.Row():
|
428 |
+
output_obj_rgb_path = gr.Model3D(
|
429 |
+
label="RGB Model (OBJ Format)",
|
430 |
+
interactive=False,
|
431 |
+
)
|
432 |
+
with gr.Row():
|
433 |
+
output_obj_albedo_path = gr.Model3D(
|
434 |
+
label="Albedo Model (OBJ Format)",
|
435 |
+
interactive=False,
|
436 |
+
)
|
437 |
+
with gr.Row():
|
438 |
+
output_obj_shading_path = gr.Model3D(
|
439 |
+
label="Shading Model (OBJ Format)",
|
440 |
+
interactive=False,
|
441 |
+
)
|
442 |
+
|
443 |
+
|
444 |
+
input_image = gr.State()
|
445 |
+
button_gen.click(fn=generate_mv, inputs=[condition_input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed, mv_moedl_option],
|
446 |
+
outputs=[mv_image_grid, processed_image, input_image],).success(
|
447 |
+
fn=generate_3d,
|
448 |
+
inputs=[input_image, condition_input_image, mv_moedl_option, input_seed],
|
449 |
+
outputs=[output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path] , #output_video_path
|
450 |
+
)
|
451 |
+
|
452 |
+
|
453 |
+
block.launch(server_name="0.0.0.0", share=False)
|