xinjie.wang commited on
Commit
10c708b
·
1 Parent(s): 41600f7
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
- os.environ["GRADIO_APP"] = "imageto3d"
3
-
4
- from functools import partial
5
 
 
6
  import gradio as gr
7
  from common import (
8
  MAX_SEED,
9
  VERSION,
10
  active_btn_by_content,
 
11
  extract_3d_representations_v2,
12
  extract_urdf,
13
  get_seed,
@@ -15,15 +14,11 @@ from common import (
15
  preprocess_image_fn,
16
  preprocess_sam_image_fn,
17
  select_point,
18
- start_session,
19
- end_session,
20
  )
21
  from gradio.themes import Default
22
  from gradio.themes.utils.colors import slate
23
 
24
-
25
-
26
-
27
  with gr.Blocks(
28
  delete_cache=(43200, 43200), theme=Default(primary_hue=slate)
29
  ) as demo:
@@ -231,7 +226,7 @@ with gr.Blocks(
231
  label="Mesh Representation",
232
  height=300,
233
  interactive=False,
234
- clear_color=[0.9, 0.9, 0.9, 1.0],
235
  )
236
  gr.Markdown(
237
  """ The rendering of `Gaussian Representation` takes additional 10s. """ # noqa
@@ -432,4 +427,4 @@ with gr.Blocks(
432
 
433
 
434
  if __name__ == "__main__":
435
- demo.launch()
 
1
  import os
 
 
 
2
 
3
+ os.environ["GRADIO_APP"] = "imageto3d"
4
  import gradio as gr
5
  from common import (
6
  MAX_SEED,
7
  VERSION,
8
  active_btn_by_content,
9
+ end_session,
10
  extract_3d_representations_v2,
11
  extract_urdf,
12
  get_seed,
 
14
  preprocess_image_fn,
15
  preprocess_sam_image_fn,
16
  select_point,
17
+ start_session,
 
18
  )
19
  from gradio.themes import Default
20
  from gradio.themes.utils.colors import slate
21
 
 
 
 
22
  with gr.Blocks(
23
  delete_cache=(43200, 43200), theme=Default(primary_hue=slate)
24
  ) as demo:
 
226
  label="Mesh Representation",
227
  height=300,
228
  interactive=False,
229
+ clear_color=[1, 1, 1, 1],
230
  )
231
  gr.Markdown(
232
  """ The rendering of `Gaussian Representation` takes additional 10s. """ # noqa
 
427
 
428
 
429
  if __name__ == "__main__":
430
+ demo.launch(server_name="10.34.8.82", server_port=8084)
asset3d_gen/models/sr_model.py CHANGED
@@ -58,16 +58,23 @@ class ImageStableSR:
58
 
59
  class ImageRealESRGAN:
60
  def __init__(self, outscale: int, model_path: str = None) -> None:
61
- # monkey_patch_basicsr.py
62
- import sys
63
- import types
64
  import torchvision
65
  from packaging import version
66
- if version.parse(torchvision.__version__) >= version.parse("0.16"):
 
 
 
 
67
  import torchvision.transforms.functional as TF
68
- functional_tensor = types.ModuleType("torchvision.transforms.functional_tensor")
 
 
 
69
  functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
70
- sys.modules["torchvision.transforms.functional_tensor"] = functional_tensor
 
 
71
 
72
  from basicsr.archs.rrdbnet_arch import RRDBNet
73
  from realesrgan import RealESRGANer
 
58
 
59
  class ImageRealESRGAN:
60
  def __init__(self, outscale: int, model_path: str = None) -> None:
61
+ # monkey_patch
 
 
62
  import torchvision
63
  from packaging import version
64
+
65
+ if version.parse(torchvision.__version__) > version.parse("0.16"):
66
+ import sys
67
+ import types
68
+
69
  import torchvision.transforms.functional as TF
70
+
71
+ functional_tensor = types.ModuleType(
72
+ "torchvision.transforms.functional_tensor"
73
+ )
74
  functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
75
+ sys.modules["torchvision.transforms.functional_tensor"] = (
76
+ functional_tensor
77
+ )
78
 
79
  from basicsr.archs.rrdbnet_arch import RRDBNet
80
  from realesrgan import RealESRGANer
asset3d_gen/utils/gpt_clients.py CHANGED
@@ -154,7 +154,8 @@ if endpoint and api_key and api_version:
154
  else:
155
  GPT_CLIENT = GPTclient(
156
  endpoint="https://openrouter.ai/api/v1",
157
- api_key="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", # noqa
 
158
  model_name="qwen/qwen2.5-vl-72b-instruct:free",
159
  )
160
 
 
154
  else:
155
  GPT_CLIENT = GPTclient(
156
  endpoint="https://openrouter.ai/api/v1",
157
+ # api_key="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", # noqa
158
+ api_key="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",
159
  model_name="qwen/qwen2.5-vl-72b-instruct:free",
160
  )
161
 
asset3d_gen/validators/urdf_convertor.py CHANGED
@@ -406,7 +406,7 @@ class URDFGenerator(object):
406
  if __name__ == "__main__":
407
  urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
408
  urdf_path = urdf_gen(
409
- mesh_path="scripts/apps/assets/example_texture/meshes/robot.obj",
410
  output_root="outputs/test_urdf",
411
  # category="coffee machine",
412
  # min_height=1.0,
 
406
  if __name__ == "__main__":
407
  urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
408
  urdf_path = urdf_gen(
409
+ mesh_path="outputs/imageto3d/cma/o5/URDF_o5/mesh/o5.obj",
410
  output_root="outputs/test_urdf",
411
  # category="coffee machine",
412
  # min_height=1.0,
common.py CHANGED
@@ -1,24 +1,18 @@
1
  import gc
2
  import logging
3
  import os
 
 
4
  import sys
5
  from glob import glob
6
- from typing import Union
7
- import shutil
8
  import cv2
9
- import subprocess
10
  import gradio as gr
11
  import numpy as np
12
  import spaces
13
  import torch
14
  import trimesh
15
  from easydict import EasyDict as edict
16
- from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import (
17
- StableDiffusionXLPipeline,
18
- )
19
- from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter import ( # noqa
20
- StableDiffusionXLPipeline as StableDiffusionXLPipelineIP,
21
- )
22
  from PIL import Image
23
  from tqdm import tqdm
24
  from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
@@ -29,9 +23,15 @@ from asset3d_gen.models.segment_model import (
29
  SAMPredictor,
30
  trellis_preprocess,
31
  )
32
- from asset3d_gen.models.sr_model import ImageRealESRGAN, ImageStableSR
33
  from asset3d_gen.scripts.render_gs import entrypoint as render_gs_api
34
- from asset3d_gen.scripts.text2image import text2img_gen
 
 
 
 
 
 
35
  from asset3d_gen.utils.process_media import (
36
  filter_image_small_connected_components,
37
  merge_images_video,
@@ -45,12 +45,6 @@ from asset3d_gen.validators.quality_checkers import (
45
  MeshGeoChecker,
46
  )
47
  from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
48
- from asset3d_gen.utils.gpt_clients import GPT_CLIENT
49
- from asset3d_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
50
- from asset3d_gen.scripts.text2image import (
51
- build_text2img_ip_pipeline,
52
- build_text2img_pipeline,
53
- )
54
 
55
  current_file_path = os.path.abspath(__file__)
56
  current_dir = os.path.dirname(current_file_path)
@@ -73,11 +67,16 @@ logging.basicConfig(
73
  logger = logging.getLogger(__name__)
74
 
75
 
 
 
 
76
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
 
 
77
  MAX_SEED = 100000
78
- IMAGE_BUFFER = {}
79
  DELIGHT = DelightingModel()
80
- IMAGESR_MODEL = None # ImageRealESRGAN(outscale=4)
 
81
 
82
  if os.getenv("GRADIO_APP") == "imageto3d":
83
  RBG_REMOVER = RembgRemover()
@@ -99,7 +98,9 @@ elif os.getenv("GRADIO_APP") == "textto3d":
99
  "JeffreyXiang/TRELLIS-image-large"
100
  )
101
  # PIPELINE.cuda()
102
- PIPELINE_IMG_IP = build_text2img_ip_pipeline("weights/Kolors", ref_scale=0.3)
 
 
103
  PIPELINE_IMG = build_text2img_pipeline("weights/Kolors")
104
  SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
105
  GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
@@ -160,6 +161,7 @@ def render_mesh(sample, extrinsics, intrinsics, options={}, **kwargs):
160
  return rets
161
 
162
 
 
163
  def render_video(
164
  sample,
165
  resolution=512,
@@ -178,8 +180,6 @@ def render_video(
178
  render_fn = (
179
  render_mesh if isinstance(sample, MeshExtractResult) else render_frames
180
  )
181
- print(torch.cuda.memory_allocated() / 1024**2, "MB 已分配")
182
- print(torch.cuda.memory_reserved() / 1024**2, "MB 已预留")
183
  result = render_fn(
184
  sample,
185
  extrinsics,
@@ -187,21 +187,21 @@ def render_video(
187
  {"resolution": resolution, "bg_color": bg_color},
188
  **kwargs,
189
  )
190
-
191
  return result
192
 
193
 
194
  @spaces.GPU
195
  def preprocess_image_fn(
196
  image: str | np.ndarray | Image.Image,
 
197
  ) -> Image.Image:
198
  if isinstance(image, str):
199
  image = Image.open(image)
200
  elif isinstance(image, np.ndarray):
201
  image = Image.fromarray(image)
202
 
203
- IMAGE_BUFFER["raw_image"] = image
204
-
205
  image = RBG_REMOVER(image)
206
  image = trellis_preprocess(image)
207
 
@@ -209,11 +209,13 @@ def preprocess_image_fn(
209
 
210
 
211
  @spaces.GPU
212
- def preprocess_sam_image_fn(image: Image.Image) -> Image.Image:
 
 
213
  if isinstance(image, np.ndarray):
214
  image = Image.fromarray(image)
215
 
216
- IMAGE_BUFFER["raw_image"] = image
217
  sam_image = SAM_PREDICTOR.preprocess_image(image)
218
  SAM_PREDICTOR.predictor.set_image(sam_image)
219
 
@@ -352,8 +354,8 @@ def image_to_3d(
352
 
353
  if isinstance(seg_image, np.ndarray):
354
  seg_image = Image.fromarray(seg_image)
355
- IMAGE_BUFFER["seg_image"] = seg_image
356
 
 
357
  PIPELINE.cuda()
358
  outputs = PIPELINE.run(
359
  seg_image,
@@ -370,13 +372,12 @@ def image_to_3d(
370
  },
371
  )
372
  # Set to cpu for memory saving.
373
- # PIPELINE.cpu()
374
 
375
  gs_model = outputs["gaussian"][0]
376
  mesh_model = outputs["mesh"][0]
377
- with torch.no_grad():
378
- color_images = render_video(gs_model, num_frames=1)["color"]
379
- normal_images = render_video(mesh_model, num_frames=1)["normal"]
380
  output_root = TMP_DIR
381
  if req is not None:
382
  output_root = os.path.join(output_root, str(req.session_hash))
@@ -567,8 +568,10 @@ def extract_urdf(
567
  for checker in CHECKERS:
568
  images = image_paths
569
  if isinstance(checker, ImageSegChecker):
570
- print("IMAGE_BUFFER", IMAGE_BUFFER.keys())
571
- images = [IMAGE_BUFFER["raw_image"], IMAGE_BUFFER["seg_image"]]
 
 
572
  images_list.append(images)
573
 
574
  results = BaseChecker.validate(CHECKERS, images_list)
@@ -634,7 +637,7 @@ def text2image_fn(
634
  if postprocess:
635
  for idx in range(len(images)):
636
  image = images[idx]
637
- images[idx] = preprocess_image_fn(image, RBG_REMOVER)
638
 
639
  save_paths = []
640
  for idx, image in enumerate(images):
 
1
  import gc
2
  import logging
3
  import os
4
+ import shutil
5
+ import subprocess
6
  import sys
7
  from glob import glob
8
+
 
9
  import cv2
 
10
  import gradio as gr
11
  import numpy as np
12
  import spaces
13
  import torch
14
  import trimesh
15
  from easydict import EasyDict as edict
 
 
 
 
 
 
16
  from PIL import Image
17
  from tqdm import tqdm
18
  from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
 
23
  SAMPredictor,
24
  trellis_preprocess,
25
  )
26
+ from asset3d_gen.models.sr_model import ImageRealESRGAN
27
  from asset3d_gen.scripts.render_gs import entrypoint as render_gs_api
28
+ from asset3d_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
29
+ from asset3d_gen.scripts.text2image import (
30
+ build_text2img_ip_pipeline,
31
+ build_text2img_pipeline,
32
+ text2img_gen,
33
+ )
34
+ from asset3d_gen.utils.gpt_clients import GPT_CLIENT
35
  from asset3d_gen.utils.process_media import (
36
  filter_image_small_connected_components,
37
  merge_images_video,
 
45
  MeshGeoChecker,
46
  )
47
  from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
 
 
 
 
 
 
48
 
49
  current_file_path = os.path.abspath(__file__)
50
  current_dir = os.path.dirname(current_file_path)
 
67
  logger = logging.getLogger(__name__)
68
 
69
 
70
+ os.environ["TORCH_EXTENSIONS_DIR"] = os.path.expanduser(
71
+ "~/.cache/torch_extensions"
72
+ )
73
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
74
+ os.environ['SPCONV_ALGO'] = 'native'
75
+
76
  MAX_SEED = 100000
 
77
  DELIGHT = DelightingModel()
78
+ IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
79
+
80
 
81
  if os.getenv("GRADIO_APP") == "imageto3d":
82
  RBG_REMOVER = RembgRemover()
 
98
  "JeffreyXiang/TRELLIS-image-large"
99
  )
100
  # PIPELINE.cuda()
101
+ PIPELINE_IMG_IP = build_text2img_ip_pipeline(
102
+ "weights/Kolors", ref_scale=0.3
103
+ )
104
  PIPELINE_IMG = build_text2img_pipeline("weights/Kolors")
105
  SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
106
  GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
 
161
  return rets
162
 
163
 
164
+ @spaces.GPU
165
  def render_video(
166
  sample,
167
  resolution=512,
 
180
  render_fn = (
181
  render_mesh if isinstance(sample, MeshExtractResult) else render_frames
182
  )
 
 
183
  result = render_fn(
184
  sample,
185
  extrinsics,
 
187
  {"resolution": resolution, "bg_color": bg_color},
188
  **kwargs,
189
  )
190
+
191
  return result
192
 
193
 
194
  @spaces.GPU
195
  def preprocess_image_fn(
196
  image: str | np.ndarray | Image.Image,
197
+ req: gr.Request,
198
  ) -> Image.Image:
199
  if isinstance(image, str):
200
  image = Image.open(image)
201
  elif isinstance(image, np.ndarray):
202
  image = Image.fromarray(image)
203
 
204
+ image.save(f"{TMP_DIR}/{req.session_hash}/raw_image.png")
 
205
  image = RBG_REMOVER(image)
206
  image = trellis_preprocess(image)
207
 
 
209
 
210
 
211
  @spaces.GPU
212
+ def preprocess_sam_image_fn(
213
+ image: Image.Image, req: gr.Request
214
+ ) -> Image.Image:
215
  if isinstance(image, np.ndarray):
216
  image = Image.fromarray(image)
217
 
218
+ image.save(f"{TMP_DIR}/{req.session_hash}/raw_image.png")
219
  sam_image = SAM_PREDICTOR.preprocess_image(image)
220
  SAM_PREDICTOR.predictor.set_image(sam_image)
221
 
 
354
 
355
  if isinstance(seg_image, np.ndarray):
356
  seg_image = Image.fromarray(seg_image)
 
357
 
358
+ seg_image.save(f"{TMP_DIR}/{req.session_hash}/seg_image.png")
359
  PIPELINE.cuda()
360
  outputs = PIPELINE.run(
361
  seg_image,
 
372
  },
373
  )
374
  # Set to cpu for memory saving.
375
+ PIPELINE.cpu()
376
 
377
  gs_model = outputs["gaussian"][0]
378
  mesh_model = outputs["mesh"][0]
379
+ color_images = render_video(gs_model)["color"]
380
+ normal_images = render_video(mesh_model)["normal"]
 
381
  output_root = TMP_DIR
382
  if req is not None:
383
  output_root = os.path.join(output_root, str(req.session_hash))
 
568
  for checker in CHECKERS:
569
  images = image_paths
570
  if isinstance(checker, ImageSegChecker):
571
+ images = [
572
+ f"{TMP_DIR}/{req.session_hash}/raw_image.png",
573
+ f"{TMP_DIR}/{req.session_hash}/seg_image.png",
574
+ ]
575
  images_list.append(images)
576
 
577
  results = BaseChecker.validate(CHECKERS, images_list)
 
637
  if postprocess:
638
  for idx in range(len(images)):
639
  image = images[idx]
640
+ images[idx] = preprocess_image_fn(image)
641
 
642
  save_paths = []
643
  for idx, image in enumerate(images):
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- # --extra-index-url https://download.pytorch.org/whl/cu118
2
  --extra-index-url https://download.pytorch.org/whl/cu121
3
 
4
 
5
- # torch==2.1.0
6
- # torchaudio==2.1.0
7
- # torchvision==0.16.0
8
- # xformers==0.0.22.post7
 
9
  dataclasses_json
10
  easydict
11
  opencv-python>4.5
@@ -18,7 +18,6 @@ pymeshfix==0.17.0
18
  igraph==0.11.8
19
  pyvista==0.36.1
20
  openai==1.58.1
21
- # spconv-cu118==2.3.8
22
  transformers==4.42.4
23
  # gradio_litmodel3d==0.0.1
24
  gradio==5.12.0
@@ -27,7 +26,6 @@ diffusers==0.31.0
27
  xatlas==0.0.9
28
  onnxruntime==1.20.1
29
  tenacity==8.2.2
30
- # pytorch-lightning==2.1.0
31
  accelerate==0.33.0
32
  basicsr==1.4.2
33
  realesrgan==0.3.0
@@ -38,22 +36,8 @@ utils3d@git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c
38
  clip@git+https://github.com/openai/CLIP.git
39
  kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
40
  segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f
41
- # https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl
42
- # https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
43
- # https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl
44
- # https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt21cu118-cp310-cp310-linux_x86_64.whl
45
- # https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
46
-
47
-
48
- torch==2.4.0
49
-
50
- torchvision==0.19.0
51
- pytorch-lightning==2.4.0
52
- spconv-cu120==2.3.6
53
- xformers==0.0.27.post2
54
- kaolin@git+https://github.com/NVIDIAGameWorks/[email protected]
55
- https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
56
-
57
- https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
58
- https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
59
- https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
 
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
 
3
 
4
+ torch==2.4.0
5
+ torchvision==0.19.0
6
+ xformers==0.0.27.post2
7
+ pytorch-lightning==2.4.0
8
+ spconv-cu120==2.3.6
9
  dataclasses_json
10
  easydict
11
  opencv-python>4.5
 
18
  igraph==0.11.8
19
  pyvista==0.36.1
20
  openai==1.58.1
 
21
  transformers==4.42.4
22
  # gradio_litmodel3d==0.0.1
23
  gradio==5.12.0
 
26
  xatlas==0.0.9
27
  onnxruntime==1.20.1
28
  tenacity==8.2.2
 
29
  accelerate==0.33.0
30
  basicsr==1.4.2
31
  realesrgan==0.3.0
 
36
  clip@git+https://github.com/openai/CLIP.git
37
  kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
38
  segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f
39
+ https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/gsplat-1.5.0%2Bpt24cu121-cp310-cp310-linux_x86_64.whl
40
+ https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl
41
+ https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl
42
+ https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/flash_attn-2.7.0.post2%2Bcu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
43
+ https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl