Spaces:

xinjjj
/

ImgRoboAssetGen

Running on Zero

App Files Files Community

xinjie.wang commited on Apr 14

Commit

10c708b

1 Parent(s): 41600f7

update

Browse files

Files changed (6) hide show

app.py +5 -10
asset3d_gen/models/sr_model.py +13 -6
asset3d_gen/utils/gpt_clients.py +2 -1
asset3d_gen/validators/urdf_convertor.py +1 -1
common.py +38 -35
requirements.txt +10 -26

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
-os.environ["GRADIO_APP"] = "imageto3d"
-from functools import partial
 import gradio as gr
 from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_content,
     extract_3d_representations_v2,
     extract_urdf,
     get_seed,
@@ -15,15 +14,11 @@ from common import (
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
-    start_session,
-    end_session,
 )
 from gradio.themes import Default
 from gradio.themes.utils.colors import slate
 with gr.Blocks(
     delete_cache=(43200, 43200), theme=Default(primary_hue=slate)
 ) as demo:
@@ -231,7 +226,7 @@ with gr.Blocks(
                     label="Mesh Representation",
                     height=300,
                     interactive=False,
-                    clear_color=[0.9, 0.9, 0.9, 1.0],
                 )
             gr.Markdown(
                 """ The rendering of `Gaussian Representation` takes additional 10s. """  # noqa
@@ -432,4 +427,4 @@ with gr.Blocks(
 if __name__ == "__main__":
-    demo.launch()

 import os
+os.environ["GRADIO_APP"] = "imageto3d"
 import gradio as gr
 from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_content,
+    end_session,
     extract_3d_representations_v2,
     extract_urdf,
     get_seed,
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
+    start_session,
 )
 from gradio.themes import Default
 from gradio.themes.utils.colors import slate
 with gr.Blocks(
     delete_cache=(43200, 43200), theme=Default(primary_hue=slate)
 ) as demo:
                     label="Mesh Representation",
                     height=300,
                     interactive=False,
+                    clear_color=[1, 1, 1, 1],
                 )
             gr.Markdown(
                 """ The rendering of `Gaussian Representation` takes additional 10s. """  # noqa
 if __name__ == "__main__":
+    demo.launch(server_name="10.34.8.82", server_port=8084)

asset3d_gen/models/sr_model.py CHANGED Viewed

@@ -58,16 +58,23 @@ class ImageStableSR:
 class ImageRealESRGAN:
     def __init__(self, outscale: int, model_path: str = None) -> None:
-        # monkey_patch_basicsr.py
-        import sys
-        import types
         import torchvision
         from packaging import version
-        if version.parse(torchvision.__version__) >= version.parse("0.16"):
             import torchvision.transforms.functional as TF
-            functional_tensor = types.ModuleType("torchvision.transforms.functional_tensor")
             functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
-            sys.modules["torchvision.transforms.functional_tensor"] = functional_tensor
         from basicsr.archs.rrdbnet_arch import RRDBNet
         from realesrgan import RealESRGANer

 class ImageRealESRGAN:
     def __init__(self, outscale: int, model_path: str = None) -> None:
+        # monkey_patch
         import torchvision
         from packaging import version
+        if version.parse(torchvision.__version__) > version.parse("0.16"):
+            import sys
+            import types
             import torchvision.transforms.functional as TF
+            functional_tensor = types.ModuleType(
+                "torchvision.transforms.functional_tensor"
+            )
             functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
+            sys.modules["torchvision.transforms.functional_tensor"] = (
+                functional_tensor
+            )
         from basicsr.archs.rrdbnet_arch import RRDBNet
         from realesrgan import RealESRGANer

asset3d_gen/utils/gpt_clients.py CHANGED Viewed

@@ -154,7 +154,8 @@ if endpoint and api_key and api_version:
 else:
     GPT_CLIENT = GPTclient(
         endpoint="https://openrouter.ai/api/v1",
-        api_key="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",  # noqa
         model_name="qwen/qwen2.5-vl-72b-instruct:free",
     )

 else:
     GPT_CLIENT = GPTclient(
         endpoint="https://openrouter.ai/api/v1",
+        # api_key="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",  # noqa
+        api_key="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",
         model_name="qwen/qwen2.5-vl-72b-instruct:free",
     )

asset3d_gen/validators/urdf_convertor.py CHANGED Viewed

@@ -406,7 +406,7 @@ class URDFGenerator(object):
 if __name__ == "__main__":
     urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
     urdf_path = urdf_gen(
-        mesh_path="scripts/apps/assets/example_texture/meshes/robot.obj",
         output_root="outputs/test_urdf",
         # category="coffee machine",
         # min_height=1.0,

 if __name__ == "__main__":
     urdf_gen = URDFGenerator(GPT_CLIENT, render_view_num=4)
     urdf_path = urdf_gen(
+        mesh_path="outputs/imageto3d/cma/o5/URDF_o5/mesh/o5.obj",
         output_root="outputs/test_urdf",
         # category="coffee machine",
         # min_height=1.0,

common.py CHANGED Viewed

@@ -1,24 +1,18 @@
 import gc
 import logging
 import os
 import sys
 from glob import glob
-from typing import Union
-import shutil
 import cv2
-import subprocess
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 import trimesh
 from easydict import EasyDict as edict
-from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import (
-    StableDiffusionXLPipeline,
-)
-from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256_ipadapter import (  # noqa
-    StableDiffusionXLPipeline as StableDiffusionXLPipelineIP,
-)
 from PIL import Image
 from tqdm import tqdm
 from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
@@ -29,9 +23,15 @@ from asset3d_gen.models.segment_model import (
     SAMPredictor,
     trellis_preprocess,
 )
-from asset3d_gen.models.sr_model import ImageRealESRGAN, ImageStableSR
 from asset3d_gen.scripts.render_gs import entrypoint as render_gs_api
-from asset3d_gen.scripts.text2image import text2img_gen
 from asset3d_gen.utils.process_media import (
     filter_image_small_connected_components,
     merge_images_video,
@@ -45,12 +45,6 @@ from asset3d_gen.validators.quality_checkers import (
     MeshGeoChecker,
 )
 from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
-from asset3d_gen.utils.gpt_clients import GPT_CLIENT
-from asset3d_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
-from asset3d_gen.scripts.text2image import (
-    build_text2img_ip_pipeline,
-    build_text2img_pipeline,
-)
 current_file_path = os.path.abspath(__file__)
 current_dir = os.path.dirname(current_file_path)
@@ -73,11 +67,16 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
 MAX_SEED = 100000
-IMAGE_BUFFER = {}
 DELIGHT = DelightingModel()
-IMAGESR_MODEL = None # ImageRealESRGAN(outscale=4)
 if os.getenv("GRADIO_APP") == "imageto3d":
     RBG_REMOVER = RembgRemover()
@@ -99,7 +98,9 @@ elif os.getenv("GRADIO_APP") == "textto3d":
         "JeffreyXiang/TRELLIS-image-large"
     )
     # PIPELINE.cuda()
-    PIPELINE_IMG_IP = build_text2img_ip_pipeline("weights/Kolors", ref_scale=0.3)
     PIPELINE_IMG = build_text2img_pipeline("weights/Kolors")
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
@@ -160,6 +161,7 @@ def render_mesh(sample, extrinsics, intrinsics, options={}, **kwargs):
     return rets
 def render_video(
     sample,
     resolution=512,
@@ -178,8 +180,6 @@ def render_video(
     render_fn = (
         render_mesh if isinstance(sample, MeshExtractResult) else render_frames
     )
-    print(torch.cuda.memory_allocated() / 1024**2, "MB 已分配")
-    print(torch.cuda.memory_reserved() / 1024**2, "MB 已预留")
     result = render_fn(
         sample,
         extrinsics,
@@ -187,21 +187,21 @@ def render_video(
         {"resolution": resolution, "bg_color": bg_color},
         **kwargs,
     )
     return result
 @spaces.GPU
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
 ) -> Image.Image:
     if isinstance(image, str):
         image = Image.open(image)
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    IMAGE_BUFFER["raw_image"] = image
     image = RBG_REMOVER(image)
     image = trellis_preprocess(image)
@@ -209,11 +209,13 @@ def preprocess_image_fn(
 @spaces.GPU
-def preprocess_sam_image_fn(image: Image.Image) -> Image.Image:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    IMAGE_BUFFER["raw_image"] = image
     sam_image = SAM_PREDICTOR.preprocess_image(image)
     SAM_PREDICTOR.predictor.set_image(sam_image)
@@ -352,8 +354,8 @@ def image_to_3d(
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
-    IMAGE_BUFFER["seg_image"] = seg_image
     PIPELINE.cuda()
     outputs = PIPELINE.run(
         seg_image,
@@ -370,13 +372,12 @@ def image_to_3d(
         },
     )
     # Set to cpu for memory saving.
-    # PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
-    with torch.no_grad():
-        color_images = render_video(gs_model, num_frames=1)["color"]
-        normal_images = render_video(mesh_model, num_frames=1)["normal"]
     output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
@@ -567,8 +568,10 @@ def extract_urdf(
     for checker in CHECKERS:
         images = image_paths
         if isinstance(checker, ImageSegChecker):
-            print("IMAGE_BUFFER", IMAGE_BUFFER.keys())
-            images = [IMAGE_BUFFER["raw_image"], IMAGE_BUFFER["seg_image"]]
         images_list.append(images)
     results = BaseChecker.validate(CHECKERS, images_list)
@@ -634,7 +637,7 @@ def text2image_fn(
     if postprocess:
         for idx in range(len(images)):
             image = images[idx]
-            images[idx] = preprocess_image_fn(image, RBG_REMOVER)
     save_paths = []
     for idx, image in enumerate(images):

 import gc
 import logging
 import os
+import shutil
+import subprocess
 import sys
 from glob import glob
 import cv2
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 import trimesh
 from easydict import EasyDict as edict
 from PIL import Image
 from tqdm import tqdm
 from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
     SAMPredictor,
     trellis_preprocess,
 )
+from asset3d_gen.models.sr_model import ImageRealESRGAN
 from asset3d_gen.scripts.render_gs import entrypoint as render_gs_api
+from asset3d_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
+from asset3d_gen.scripts.text2image import (
+    build_text2img_ip_pipeline,
+    build_text2img_pipeline,
+    text2img_gen,
+)
+from asset3d_gen.utils.gpt_clients import GPT_CLIENT
 from asset3d_gen.utils.process_media import (
     filter_image_small_connected_components,
     merge_images_video,
     MeshGeoChecker,
 )
 from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
 current_file_path = os.path.abspath(__file__)
 current_dir = os.path.dirname(current_file_path)
 logger = logging.getLogger(__name__)
+os.environ["TORCH_EXTENSIONS_DIR"] = os.path.expanduser(
+    "~/.cache/torch_extensions"
+)
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
+os.environ['SPCONV_ALGO'] = 'native'
 MAX_SEED = 100000
 DELIGHT = DelightingModel()
+IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
 if os.getenv("GRADIO_APP") == "imageto3d":
     RBG_REMOVER = RembgRemover()
         "JeffreyXiang/TRELLIS-image-large"
     )
     # PIPELINE.cuda()
+    PIPELINE_IMG_IP = build_text2img_ip_pipeline(
+        "weights/Kolors", ref_scale=0.3
+    )
     PIPELINE_IMG = build_text2img_pipeline("weights/Kolors")
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
     return rets
+@spaces.GPU
 def render_video(
     sample,
     resolution=512,
     render_fn = (
         render_mesh if isinstance(sample, MeshExtractResult) else render_frames
     )
     result = render_fn(
         sample,
         extrinsics,
         {"resolution": resolution, "bg_color": bg_color},
         **kwargs,
     )
     return result
 @spaces.GPU
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
+    req: gr.Request,
 ) -> Image.Image:
     if isinstance(image, str):
         image = Image.open(image)
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    image.save(f"{TMP_DIR}/{req.session_hash}/raw_image.png")
     image = RBG_REMOVER(image)
     image = trellis_preprocess(image)
 @spaces.GPU
+def preprocess_sam_image_fn(
+    image: Image.Image, req: gr.Request
+) -> Image.Image:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    image.save(f"{TMP_DIR}/{req.session_hash}/raw_image.png")
     sam_image = SAM_PREDICTOR.preprocess_image(image)
     SAM_PREDICTOR.predictor.set_image(sam_image)
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
+    seg_image.save(f"{TMP_DIR}/{req.session_hash}/seg_image.png")
     PIPELINE.cuda()
     outputs = PIPELINE.run(
         seg_image,
         },
     )
     # Set to cpu for memory saving.
+    PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
+    color_images = render_video(gs_model)["color"]
+    normal_images = render_video(mesh_model)["normal"]
     output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
     for checker in CHECKERS:
         images = image_paths
         if isinstance(checker, ImageSegChecker):
+            images = [
+                f"{TMP_DIR}/{req.session_hash}/raw_image.png",
+                f"{TMP_DIR}/{req.session_hash}/seg_image.png",
+            ]
         images_list.append(images)
     results = BaseChecker.validate(CHECKERS, images_list)
     if postprocess:
         for idx in range(len(images)):
             image = images[idx]
+            images[idx] = preprocess_image_fn(image)
     save_paths = []
     for idx, image in enumerate(images):

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-# --extra-index-url https://download.pytorch.org/whl/cu118
 --extra-index-url https://download.pytorch.org/whl/cu121
-# torch==2.1.0
-# torchaudio==2.1.0
-# torchvision==0.16.0
-# xformers==0.0.22.post7
 dataclasses_json
 easydict
 opencv-python>4.5
@@ -18,7 +18,6 @@ pymeshfix==0.17.0
 igraph==0.11.8
 pyvista==0.36.1
 openai==1.58.1
-# spconv-cu118==2.3.8
 transformers==4.42.4
 # gradio_litmodel3d==0.0.1
 gradio==5.12.0
@@ -27,7 +26,6 @@ diffusers==0.31.0
 xatlas==0.0.9
 onnxruntime==1.20.1
 tenacity==8.2.2
-# pytorch-lightning==2.1.0
 accelerate==0.33.0
 basicsr==1.4.2
 realesrgan==0.3.0
@@ -38,22 +36,8 @@ utils3d@git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c
 clip@git+https://github.com/openai/CLIP.git
 kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
 segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f
-# https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl
-# https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl
-# https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl
-# https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt21cu118-cp310-cp310-linux_x86_64.whl
-# https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
-torch==2.4.0
-torchvision==0.19.0
-pytorch-lightning==2.4.0
-spconv-cu120==2.3.6
-xformers==0.0.27.post2
-kaolin@git+https://github.com/NVIDIAGameWorks/[email protected]
-https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
-https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
-https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

 --extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.4.0
+torchvision==0.19.0
+xformers==0.0.27.post2
+pytorch-lightning==2.4.0
+spconv-cu120==2.3.6
 dataclasses_json
 easydict
 opencv-python>4.5
 igraph==0.11.8
 pyvista==0.36.1
 openai==1.58.1
 transformers==4.42.4
 # gradio_litmodel3d==0.0.1
 gradio==5.12.0
 xatlas==0.0.9
 onnxruntime==1.20.1
 tenacity==8.2.2
 accelerate==0.33.0
 basicsr==1.4.2
 realesrgan==0.3.0
 clip@git+https://github.com/openai/CLIP.git
 kolors@git+https://github.com/Kwai-Kolors/Kolors.git#egg=038818d
 segment-anything@git+https://github.com/facebookresearch/segment-anything.git#egg=dca509f
+https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/gsplat-1.5.0%2Bpt24cu121-cp310-cp310-linux_x86_64.whl
+https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl
+https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl
+https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/flash_attn-2.7.0.post2%2Bcu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+https://huggingface.co/spaces/xinjjj/ImgRoboAssetGen/blob/main/wheels/kaolin-0.16.0-cp310-cp310-linux_x86_64.whl