cavargas10 commited on
Commit
90df5e0
·
verified ·
1 Parent(s): b3cdc8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -128
app.py CHANGED
@@ -1,121 +1,121 @@
1
  import spaces
2
  from spaces.zero.decorator import GPU
3
 
4
- import os
5
- import tyro
6
- import imageio
7
  import numpy as np
8
  import tqdm
9
  import torch
 
10
  import torch.nn.functional as F
 
11
  from safetensors.torch import load_file
12
  import rembg
13
  import gradio as gr
14
-
15
  import kiui
16
  from kiui.op import recenter
17
  from kiui.cam import orbit_camera
18
- from core.utils import get_rays
 
19
  from core.options import AllConfigs, Options
20
- from core.models import LTRFM_Mesh, LTRFM_NeRF
21
  from core.instant_utils.mesh_util import save_obj, save_obj_with_mtl
22
  from mvdream.pipeline_mvdream import MVDreamPipeline
23
  from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
24
  from huggingface_hub import hf_hub_download
25
 
 
26
 
27
  IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
28
- IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
29
- GRADIO_VIDEO_PATH = 'gradio_output.mp4'
30
- GRADIO_OBJ_PATH = 'gradio_output_rgb.obj'
31
  GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
32
  GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
33
 
 
 
34
  ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM_6V_SDF.ckpt")
35
 
36
  opt = Options(
37
- input_size=512,
38
  down_channels=(32, 64, 128, 256, 512),
39
  down_attention=(False, False, False, False, True),
40
  up_channels=(512, 256, 128),
41
  up_attention=(True, False, False, False),
42
  volume_mode='TRF_NeRF',
43
  splat_size=64,
44
- output_size=62, # crop patch
45
  data_mode='s5',
46
  num_views=8,
47
- gradient_accumulation_steps=1,
48
  mixed_precision='bf16',
49
  resume=ckpt_path,
50
  )
51
 
52
- # Model selection
 
53
  if opt.volume_mode == 'TRF_Mesh':
54
  model = LTRFM_Mesh(opt)
55
  elif opt.volume_mode == 'TRF_NeRF':
56
  model = LTRFM_NeRF(opt)
57
  else:
58
- model = None
59
 
60
- # Resume pretrained checkpoint
61
- if opt.resume:
62
  if opt.resume.endswith('safetensors'):
63
  ckpt = load_file(opt.resume, device='cpu')
64
- else:
65
  ckpt_dict = torch.load(opt.resume, map_location='cpu')
66
- ckpt = ckpt_dict["model"]
67
 
68
  state_dict = model.state_dict()
69
  for k, v in ckpt.items():
70
- k = k.replace('module.', '')
71
- if k in state_dict:
72
  if state_dict[k].shape == v.shape:
73
  state_dict[k].copy_(v)
74
  else:
75
  print(f'[WARN] mismatching shape for param {k}: ckpt {v.shape} != model {state_dict[k].shape}, ignored.')
76
  else:
77
  print(f'[WARN] unexpected param {k}: {v.shape}')
78
- print('[INFO] load resume success!')
79
 
80
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
81
  model = model.half().to(device)
82
- model.eval()
83
-
84
- tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
85
- proj_matrix = torch.zeros(4, 4, dtype=torch.float32).to(device)
86
- proj_matrix[0, 0] = 1 / tan_half_fov
87
- proj_matrix[1, 1] = 1 / tan_half_fov
88
- proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
89
  proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
90
  proj_matrix[2, 3] = 1
91
 
92
- # Load dreams
93
  pipe_text = MVDreamPipeline.from_pretrained(
94
- 'ashawkey/mvdream-sd2.1-diffusers',
95
  torch_dtype=torch.float16,
96
  trust_remote_code=True,
 
97
  )
98
  pipe_text = pipe_text.to(device)
99
 
 
100
  pipe_image = MVDreamPipeline.from_pretrained(
101
- "ashawkey/imagedream-ipmv-diffusers",
102
  torch_dtype=torch.float16,
103
  trust_remote_code=True,
 
104
  )
105
  pipe_image = pipe_image.to(device)
106
 
 
 
107
  pipe_image_plus = DiffusionPipeline.from_pretrained(
108
- "sudo-ai/zero123plus-v1.2",
109
  custom_pipeline="zero123plus",
110
  torch_dtype=torch.float16,
111
  trust_remote_code=True,
 
112
  )
113
  pipe_image_plus.scheduler = EulerAncestralDiscreteScheduler.from_config(
114
  pipe_image_plus.scheduler.config, timestep_spacing='trailing'
115
  )
116
 
117
- unet_path = './pretrained/diffusion_pytorch_model.bin'
118
 
 
119
  if os.path.exists(unet_path):
120
  unet_ckpt_path = unet_path
121
  else:
@@ -125,140 +125,329 @@ state_dict = torch.load(unet_ckpt_path, map_location='cpu')
125
  pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
126
  pipe_image_plus = pipe_image_plus.to(device)
127
 
128
- # Load rembg
129
  bg_remover = rembg.new_session()
130
 
 
131
  @spaces.GPU
132
  def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
 
133
  kiui.seed_everything(input_seed)
134
- os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
135
 
 
 
 
136
  if condition_input_image is None:
137
  mv_image_uint8 = pipe_text(prompt, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=7.5, elevation=input_elevation)
138
  mv_image_uint8 = (mv_image_uint8 * 255).astype(np.uint8)
139
-
140
  mv_image = []
141
  for i in range(4):
142
- image = rembg.remove(mv_image_uint8[i], session=bg_remover)
 
143
  image = image.astype(np.float32) / 255
144
  image = recenter(image, image[..., 0] > 0, border_ratio=0.2)
145
  image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
146
  mv_image.append(image)
147
-
148
- mv_image_grid = np.concatenate([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=1)
149
  input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
150
- processed_image = None
151
-
 
152
  else:
153
- condition_input_image = np.array(condition_input_image)
154
- carved_image = rembg.remove(condition_input_image, session=bg_remover)
 
155
  mask = carved_image[..., -1] > 0
156
  image = recenter(carved_image, mask, border_ratio=0.2)
157
  image = image.astype(np.float32) / 255.0
158
  processed_image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
159
-
160
- if mv_moedl_option == 'mvdream':
161
- mv_image = pipe_image(prompt, processed_image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
162
- mv_image_grid = np.concatenate([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=1)
 
163
  input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
164
 
165
  else:
166
  from PIL import Image
167
- from einops import rearrange
168
-
 
169
  processed_image = Image.fromarray((processed_image * 255).astype(np.uint8))
170
  mv_image = pipe_image_plus(processed_image, num_inference_steps=input_num_steps).images[0]
171
  mv_image = np.asarray(mv_image, dtype=np.float32) / 255.0
172
- mv_image = torch.from_numpy(mv_image).permute(2, 0, 1).contiguous().float()
173
  mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
174
  mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
175
  input_image = mv_image
 
176
 
177
- return mv_image_grid, processed_image, input_image
178
 
179
  @spaces.GPU
180
  def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
181
  kiui.seed_everything(input_seed)
 
 
 
 
 
 
 
 
 
 
182
 
183
- output_obj_rgb_path = os.path.join(opt.workspace, "gradio", GRADIO_OBJ_PATH)
184
- output_obj_albedo_path = os.path.join(opt.workspace, "gradio", GRADIO_OBJ_ALBEDO_PATH)
185
- output_obj_shading_path = os.path.join(opt.workspace, "gradio", GRADIO_OBJ_SHADING_PATH)
186
- output_video_path = os.path.join(opt.workspace, "gradio", GRADIO_VIDEO_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device)
189
- input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- input_rays_o, input_rays_d = get_rays(opt, proj_matrix, device, 'center')
192
 
193
- with torch.no_grad():
194
- preds = model(
195
- cond_img=input_image,
196
- rays=(input_rays_o, input_rays_d)
197
- )
198
-
199
- pred_rgb = preds[0].permute(0, 2, 3, 1).contiguous().cpu().numpy()
200
- pred_albedo = preds[1].permute(0, 2, 3, 1).contiguous().cpu().numpy()
201
- pred_shading = preds[2].permute(0, 2, 3, 1).contiguous().cpu().numpy()
202
-
203
- save_obj(output_obj_rgb_path, pred_rgb)
204
- save_obj_with_mtl(output_obj_albedo_path, pred_albedo, mode="albedo")
205
- save_obj_with_mtl(output_obj_shading_path, pred_shading, mode="shading")
206
-
207
- camera_positions = orbit_camera(type="spherical", radius=2.5, h=3, w=2)
208
- output_frames = []
209
- for pose in tqdm.tqdm(camera_positions, ncols=0):
210
- with torch.no_grad():
211
- preds = model(cond_img=input_image, rays=get_rays(opt, proj_matrix, device, pose))
212
- pred_rgb = preds[0].permute(0, 2, 3, 1).contiguous().cpu().numpy()
213
- output_frames.append(pred_rgb)
214
- output_frames = np.stack(output_frames, axis=0)
215
-
216
- imageio.mimwrite(output_video_path, output_frames, fps=24, quality=8)
217
- return output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path, output_video_path
218
-
219
- def update_mv_model(mv_moedl_option):
220
- if mv_moedl_option == 'mvdream':
221
- return gr.update(visible=False)
222
- else:
223
- return gr.update(visible=True)
224
 
225
- # Gradio interface
226
- with gr.Blocks() as demo:
227
- gr.Markdown(
228
- "## Generate 3D object from text or image prompt"
229
- )
230
  with gr.Row():
231
- with gr.Column():
232
- input_prompt = gr.Textbox(label="Prompt", lines=3)
233
- input_image = gr.Image(label="Input Image", type='numpy', optional=True)
234
- input_seed = gr.Slider(minimum=0, maximum=65535, step=1, label="Random Seed", value=42)
235
- input_elevation = gr.Slider(minimum=-10, maximum=10, step=1, label="Elevation", value=0)
236
- input_num_steps = gr.Slider(minimum=1, maximum=150, step=1, label="Number of Inference Steps", value=30)
237
- mv_moedl_option = gr.Radio(
238
- ["mvdream", "zero123plus"],
239
- label="Model Option",
240
- value="mvdream",
241
- interactive=True
242
- )
243
- generate_mv_button = gr.Button(value="Generate Multi-View Images")
244
- generate_mv_button.click(fn=generate_mv,
245
- inputs=[input_image, input_prompt, '', input_elevation, input_num_steps, input_seed, mv_moedl_option],
246
- outputs=['multi_view_output', 'processed_image_output', 'input_image_output'])
247
-
248
- with gr.Column():
249
- gr.Markdown("### Multi-View Images")
250
- multi_view_output = gr.Image()
251
- processed_image_output = gr.Image()
252
- gr.Markdown("### Input Image (Processed)")
253
- input_image_output = gr.Image()
254
- generate_3d_button = gr.Button(value="Generate 3D Model")
255
- generate_3d_button.click(fn=generate_3d,
256
- inputs=[input_image_output, processed_image_output, mv_moedl_option, input_seed],
257
- outputs=['output_obj_rgb', 'output_obj_albedo', 'output_obj_shading', 'output_video'])
258
-
259
- output_obj_rgb = gr.File(label="RGB 3D Model (.obj)")
260
- output_obj_albedo = gr.File(label="Albedo 3D Model (.obj)")
261
- output_obj_shading = gr.File(label="Shading 3D Model (.obj)")
262
- output_video = gr.Video(label="360° View of the Generated 3D Model (.mp4)")
263
-
264
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spaces
2
  from spaces.zero.decorator import GPU
3
 
 
 
 
4
  import numpy as np
5
  import tqdm
6
  import torch
7
+ import torch.nn as nn
8
  import torch.nn.functional as F
9
+ import torchvision.transforms.functional as TF
10
  from safetensors.torch import load_file
11
  import rembg
12
  import gradio as gr
 
13
  import kiui
14
  from kiui.op import recenter
15
  from kiui.cam import orbit_camera
16
+ from core.utils import get_rays, grid_distortion, orbit_camera_jitter
17
+
18
  from core.options import AllConfigs, Options
19
+ from core.models import LTRFM_Mesh,LTRFM_NeRF
20
  from core.instant_utils.mesh_util import save_obj, save_obj_with_mtl
21
  from mvdream.pipeline_mvdream import MVDreamPipeline
22
  from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
23
  from huggingface_hub import hf_hub_download
24
 
25
+ import spaces
26
 
27
  IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 
 
 
28
  GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
29
  GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
30
 
31
+ #opt = tyro.cli(AllConfigs)
32
+
33
  ckpt_path = hf_hub_download(repo_id="rgxie/LDM", filename="LDM_6V_SDF.ckpt")
34
 
35
  opt = Options(
36
+ input_size=512,
37
  down_channels=(32, 64, 128, 256, 512),
38
  down_attention=(False, False, False, False, True),
39
  up_channels=(512, 256, 128),
40
  up_attention=(True, False, False, False),
41
  volume_mode='TRF_NeRF',
42
  splat_size=64,
43
+ output_size=62, #crop patch
44
  data_mode='s5',
45
  num_views=8,
46
+ gradient_accumulation_steps=1, #2
47
  mixed_precision='bf16',
48
  resume=ckpt_path,
49
  )
50
 
51
+
52
+ # model
53
  if opt.volume_mode == 'TRF_Mesh':
54
  model = LTRFM_Mesh(opt)
55
  elif opt.volume_mode == 'TRF_NeRF':
56
  model = LTRFM_NeRF(opt)
57
  else:
58
+ model = LGM(opt)
59
 
60
+ # resume pretrained checkpoint
61
+ if opt.resume is not None:
62
  if opt.resume.endswith('safetensors'):
63
  ckpt = load_file(opt.resume, device='cpu')
64
+ else: #ckpt
65
  ckpt_dict = torch.load(opt.resume, map_location='cpu')
66
+ ckpt=ckpt_dict["model"]
67
 
68
  state_dict = model.state_dict()
69
  for k, v in ckpt.items():
70
+ k=k.replace('module.', '')
71
+ if k in state_dict:
72
  if state_dict[k].shape == v.shape:
73
  state_dict[k].copy_(v)
74
  else:
75
  print(f'[WARN] mismatching shape for param {k}: ckpt {v.shape} != model {state_dict[k].shape}, ignored.')
76
  else:
77
  print(f'[WARN] unexpected param {k}: {v.shape}')
78
+ print(f'[INFO] load resume success!')
79
 
80
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
81
  model = model.half().to(device)
 
 
 
 
 
 
 
82
  proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
83
  proj_matrix[2, 3] = 1
84
 
85
+ # load dreams
86
  pipe_text = MVDreamPipeline.from_pretrained(
87
+ 'ashawkey/mvdream-sd2.1-diffusers', # remote weights
88
  torch_dtype=torch.float16,
89
  trust_remote_code=True,
90
+ # local_files_only=True,
91
  )
92
  pipe_text = pipe_text.to(device)
93
 
94
+ # mvdream
95
  pipe_image = MVDreamPipeline.from_pretrained(
96
+ "ashawkey/imagedream-ipmv-diffusers", # remote weights
97
  torch_dtype=torch.float16,
98
  trust_remote_code=True,
99
+ # local_files_only=True,
100
  )
101
  pipe_image = pipe_image.to(device)
102
 
103
+
104
+ print('Loading 123plus model ...')
105
  pipe_image_plus = DiffusionPipeline.from_pretrained(
106
+ "sudo-ai/zero123plus-v1.2",
107
  custom_pipeline="zero123plus",
108
  torch_dtype=torch.float16,
109
  trust_remote_code=True,
110
+ #local_files_only=True,
111
  )
112
  pipe_image_plus.scheduler = EulerAncestralDiscreteScheduler.from_config(
113
  pipe_image_plus.scheduler.config, timestep_spacing='trailing'
114
  )
115
 
116
+ unet_path='./pretrained/diffusion_pytorch_model.bin'
117
 
118
+ print('Loading custom white-background unet ...')
119
  if os.path.exists(unet_path):
120
  unet_ckpt_path = unet_path
121
  else:
 
125
  pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
126
  pipe_image_plus = pipe_image_plus.to(device)
127
 
128
+ # load rembg
129
  bg_remover = rembg.new_session()
130
 
131
+
132
  @spaces.GPU
133
  def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0, input_num_steps=30, input_seed=42, mv_moedl_option=None):
134
+ # seed
135
  kiui.seed_everything(input_seed)
 
136
 
137
+ os.makedirs(os.path.join(opt.workspace, "gradio"), exist_ok=True)
138
+
139
+ # text-conditioned
140
  if condition_input_image is None:
141
  mv_image_uint8 = pipe_text(prompt, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=7.5, elevation=input_elevation)
142
  mv_image_uint8 = (mv_image_uint8 * 255).astype(np.uint8)
143
+ # bg removal
144
  mv_image = []
145
  for i in range(4):
146
+ image = rembg.remove(mv_image_uint8[i], session=bg_remover) # [H, W, 4]
147
+ # to white bg
148
  image = image.astype(np.float32) / 255
149
  image = recenter(image, image[..., 0] > 0, border_ratio=0.2)
150
  image = image[..., :3] * image[..., -1:] + (1 - image[..., -1:])
151
  mv_image.append(image)
152
+
153
+ mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
154
  input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
155
+
156
+ processed_image=None
157
+ # image-conditioned (may also input text, but no text usually works too)
158
  else:
159
+ condition_input_image = np.array(condition_input_image) # uint8
160
+ # bg removal
161
+ carved_image = rembg.remove(condition_input_image, session=bg_remover) # [H, W, 4]
162
  mask = carved_image[..., -1] > 0
163
  image = recenter(carved_image, mask, border_ratio=0.2)
164
  image = image.astype(np.float32) / 255.0
165
  processed_image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
166
+
167
+ if mv_moedl_option=='mvdream':
168
+ mv_image = pipe_image(prompt, processed_image, negative_prompt=prompt_neg, num_inference_steps=input_num_steps, guidance_scale=5.0, elevation=input_elevation)
169
+
170
+ mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
171
  input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
172
 
173
  else:
174
  from PIL import Image
175
+ from einops import rearrange, repeat
176
+
177
+ # input_image=input_image* 255
178
  processed_image = Image.fromarray((processed_image * 255).astype(np.uint8))
179
  mv_image = pipe_image_plus(processed_image, num_inference_steps=input_num_steps).images[0]
180
  mv_image = np.asarray(mv_image, dtype=np.float32) / 255.0
181
+ mv_image = torch.from_numpy(mv_image).permute(2, 0, 1).contiguous().float() # (3, 960, 640)
182
  mv_image_grid = rearrange(mv_image, 'c (n h) (m w) -> (m h) (n w) c', n=3, m=2).numpy()
183
  mv_image = rearrange(mv_image, 'c (n h) (m w) -> (n m) h w c', n=3, m=2).numpy()
184
  input_image = mv_image
185
+ return mv_image_grid, processed_image, input_image
186
 
 
187
 
188
  @spaces.GPU
189
  def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
190
  kiui.seed_everything(input_seed)
191
+
192
+ output_obj_rgb_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_PATH)
193
+ output_obj_albedo_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_ALBEDO_PATH)
194
+ output_obj_shading_path = os.path.join(opt.workspace,"gradio", GRADIO_OBJ_SHADING_PATH)
195
+
196
+ output_video_path = os.path.join(opt.workspace,"gradio", GRADIO_VIDEO_PATH)
197
+ # generate gaussians
198
+ # [4, 256, 256, 3], float32
199
+ input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
200
+ input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
201
 
202
+ images_input_vit = F.interpolate(input_image, size=(224, 224), mode='bilinear', align_corners=False)
203
+
204
+ data = {}
205
+ input_image = input_image.unsqueeze(0) # [1, 4, 9, H, W]
206
+ images_input_vit=images_input_vit.unsqueeze(0)
207
+ data['input_vit']=images_input_vit
208
+
209
+ elevation = 0
210
+ cam_poses =[]
211
+ if mv_moedl_option=='mvdream' or condition_input_image is None:
212
+ azimuth = np.arange(0, 360, 90, dtype=np.int32)
213
+ for azi in tqdm.tqdm(azimuth):
214
+ cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
215
+ cam_poses.append(cam_pose)
216
+ else:
217
+ azimuth = np.arange(30, 360, 60, dtype=np.int32)
218
+ cnt = 0
219
+ for azi in tqdm.tqdm(azimuth):
220
+ if (cnt+1) % 2!= 0:
221
+ elevation=-20
222
+ else:
223
+ elevation=30
224
+ cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True)).unsqueeze(0).to(device)
225
+ cam_poses.append(cam_pose)
226
+ cnt=cnt+1
227
+
228
+ cam_poses = torch.cat(cam_poses,0)
229
+ radius = torch.norm(cam_poses[0, :3, 3])
230
+ cam_poses[:, :3, 3] *= opt.cam_radius / radius
231
+ transform = torch.tensor([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, opt.cam_radius], [0, 0, 0, 1]], dtype=torch.float32).to(device) @ torch.inverse(cam_poses[0])
232
+ cam_poses = transform.unsqueeze(0) @ cam_poses
233
+
234
+ cam_poses=cam_poses.unsqueeze(0)
235
+ data['source_camera']=cam_poses
236
+
237
+ with torch.no_grad():
238
+ if opt.volume_mode == 'TRF_Mesh':
239
+ with torch.autocast(device_type='cuda', dtype=torch.float32):
240
+ svd_volume = model.forward_svd_volume(input_image,data)
241
+ else:
242
+ with torch.autocast(device_type='cuda', dtype=torch.float16):
243
+ svd_volume = model.forward_svd_volume(input_image,data)
244
+
245
+ #time-consuming
246
+ export_texmap=False
247
+
248
+ mesh_out = model.extract_mesh(svd_volume,use_texture_map=export_texmap)
249
+
250
+ if export_texmap:
251
+ vertices, faces, uvs, mesh_tex_idx, tex_map = mesh_out
252
+
253
+ for i in range(len(tex_map)):
254
+ mesh_path=os.path.join(opt.workspace, name + str(i) + '_'+ str(seed)+ '.obj')
255
+ save_obj_with_mtl(
256
+ vertices.data.cpu().numpy(),
257
+ uvs.data.cpu().numpy(),
258
+ faces.data.cpu().numpy(),
259
+ mesh_tex_idx.data.cpu().numpy(),
260
+ tex_map[i].permute(1, 2, 0).data.cpu().numpy(),
261
+ mesh_path,
262
+ )
263
+ else:
264
+ vertices, faces, vertex_colors = mesh_out
265
 
266
+ save_obj(vertices, faces, vertex_colors[0], output_obj_rgb_path)
267
+ save_obj(vertices, faces, vertex_colors[1], output_obj_albedo_path)
268
+ save_obj(vertices, faces, vertex_colors[2], output_obj_shading_path)
269
+
270
+ # images=[]
271
+ # azimuth = np.arange(0, 360, 6, dtype=np.int32)
272
+ # for azi in tqdm.tqdm(azimuth):
273
+
274
+ # cam_pose = torch.from_numpy(orbit_camera(elevation, azi, radius=opt.cam_radius, opengl=True))
275
+
276
+ # if opt.volume_mode == 'TRF_Mesh':
277
+ # cam_view = torch.inverse(cam_pose)
278
+ # cam_view=cam_view.unsqueeze(0).unsqueeze(0).to(device)
279
+ # data['w2c'] = cam_view
280
+ # with torch.autocast(device_type='cuda', dtype=torch.float32):
281
+ # render_images=model.render_frame(data)
282
+ # else:
283
+ # rays_o, rays_d = get_rays(cam_pose, opt.infer_render_size, opt.infer_render_size, opt.fovy) # [h, w, 3]
284
+ # rays_o=rays_o.unsqueeze(0).unsqueeze(0).to(device)# B,V,H,W,3
285
+ # rays_d=rays_d.unsqueeze(0).unsqueeze(0).to(device)
286
+ # data['all_rays_o']=rays_o
287
+ # data['all_rays_d']=rays_d
288
+ # with torch.autocast(device_type='cuda', dtype=torch.float16):
289
+ # render_images=model.render_frame(data)
290
+ # image=render_images['images_pred']
291
+
292
+ # images.append((image.squeeze(1).permute(0,2,3,1).contiguous().float().cpu().numpy() * 255).astype(np.uint8))
293
+
294
+ # images = np.concatenate(images, axis=0)
295
+ # imageio.mimwrite(output_video_path, images, fps=30)
296
+
297
+
298
+ return output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path #, output_video_path
299
+
300
+
301
+ # gradio UI
302
+
303
+ _TITLE = '''LDM: Large Tensorial SDF Model for Textured Mesh Generation'''
304
+
305
+ _DESCRIPTION = '''
306
+
307
+
308
+
309
+ * Input can be text prompt, image.
310
+ * The currently supported multi-view diffusion models include the image-conditioned MVdream and Zero123plus, as well as the text-conditioned Imagedream.
311
+ * If you find the output unsatisfying, try using different multi-view diffusion models or seeds!
312
+ '''
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+ block = gr.Blocks(title=_TITLE).queue()
342
+ with block:
343
 
 
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
 
 
 
 
 
346
  with gr.Row():
347
+ with gr.Column(scale=1):
348
+ gr.Markdown('# ' + _TITLE)
349
+ gr.Markdown(_DESCRIPTION)
350
+
351
+ with gr.Row(variant='panel'):
352
+ with gr.Column(scale=1):
353
+ with gr.Tab("Image-to-3D"):
354
+ # input image
355
+ with gr.Row():
356
+ condition_input_image = gr.Image(
357
+ label="Input Image",
358
+ image_mode="RGBA",
359
+ type="pil"
360
+ )
361
+
362
+ processed_image = gr.Image(
363
+ label="Processed Image",
364
+ image_mode="RGBA",
365
+ type="pil",
366
+ interactive=False
367
+ )
368
+
369
+
370
+ with gr.Row():
371
+ mv_moedl_option = gr.Radio([
372
+ "zero123plus",
373
+ "mvdream"
374
+ ], value="zero123plus",
375
+ label="Multi-view Diffusion")
376
+
377
+ with gr.Row(variant="panel"):
378
+ gr.Examples(
379
+ examples=[
380
+ os.path.join("example", img_name) for img_name in sorted(os.listdir("example"))
381
+ ],
382
+ inputs=[condition_input_image],
383
+ fn=lambda x: process(condition_input_image=x, prompt=''),
384
+ cache_examples=False,
385
+ examples_per_page=20,
386
+ label='Image-to-3D Examples'
387
+ )
388
+
389
+ with gr.Tab("Text-to-3D"):
390
+ # input prompt
391
+ with gr.Row():
392
+ input_text = gr.Textbox(label="prompt")
393
+ # negative prompt
394
+ with gr.Row():
395
+ input_neg_text = gr.Textbox(label="negative prompt", value='ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate')
396
+
397
+ with gr.Row(variant="panel"):
398
+ gr.Examples(
399
+ examples=[
400
+ "a hamburger",
401
+ "a furry red fox head",
402
+ "a teddy bear",
403
+ "a motorbike",
404
+ ],
405
+ inputs=[input_text],
406
+ fn=lambda x: process(condition_input_image=None, prompt=x),
407
+ cache_examples=False,
408
+ label='Text-to-3D Examples'
409
+ )
410
+
411
+ # elevation
412
+ input_elevation = gr.Slider(label="elevation", minimum=-90, maximum=90, step=1, value=0)
413
+ # inference steps
414
+ input_num_steps = gr.Slider(label="inference steps", minimum=1, maximum=100, step=1, value=30)
415
+ # random seed
416
+ input_seed = gr.Slider(label="random seed", minimum=0, maximum=100000, step=1, value=0)
417
+ # gen button
418
+ button_gen = gr.Button("Generate")
419
+
420
+
421
+ with gr.Column(scale=1):
422
+ with gr.Row():
423
+ # multi-view results
424
+ mv_image_grid = gr.Image(interactive=False, show_label=False)
425
+ # with gr.Row():
426
+ # output_video_path = gr.Video(label="video")
427
+ with gr.Row():
428
+ output_obj_rgb_path = gr.Model3D(
429
+ label="RGB Model (OBJ Format)",
430
+ interactive=False,
431
+ )
432
+ with gr.Row():
433
+ output_obj_albedo_path = gr.Model3D(
434
+ label="Albedo Model (OBJ Format)",
435
+ interactive=False,
436
+ )
437
+ with gr.Row():
438
+ output_obj_shading_path = gr.Model3D(
439
+ label="Shading Model (OBJ Format)",
440
+ interactive=False,
441
+ )
442
+
443
+
444
+ input_image = gr.State()
445
+ button_gen.click(fn=generate_mv, inputs=[condition_input_image, input_text, input_neg_text, input_elevation, input_num_steps, input_seed, mv_moedl_option],
446
+ outputs=[mv_image_grid, processed_image, input_image],).success(
447
+ fn=generate_3d,
448
+ inputs=[input_image, condition_input_image, mv_moedl_option, input_seed],
449
+ outputs=[output_obj_rgb_path, output_obj_albedo_path, output_obj_shading_path] , #output_video_path
450
+ )
451
+
452
+
453
+ block.launch(server_name="0.0.0.0", share=False)