Spaces:

xinjjj
/

ImgRoboAssetGen

Running on Zero

App Files Files Community

xinjie.wang commited on Apr 14

Commit

ffe3ce4

1 Parent(s): 07a8a18

update

Browse files

Files changed (3) hide show

app.py +7 -41
asset3d_gen/models/text_model.py +1 -1
common.py +290 -57

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import os
-import shutil
 from functools import partial
 import gradio as gr
 from common import (
     MAX_SEED,
     VERSION,
-    TrellisImageTo3DPipeline,
     active_btn_by_content,
     extract_3d_representations_v2,
     extract_urdf,
@@ -15,36 +15,13 @@ from common import (
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
 )
 from gradio.themes import Default
 from gradio.themes.utils.colors import slate
-from gradio_litmodel3d import LitModel3D
-from asset3d_gen.models.delight_model import DelightingModel
-from asset3d_gen.models.segment_model import RembgRemover, SAMPredictor
-from asset3d_gen.models.sr_model import ImageRealESRGAN
-from asset3d_gen.utils.gpt_clients import GPT_CLIENT
-from asset3d_gen.validators.quality_checkers import (
-    ImageAestheticChecker,
-    ImageSegChecker,
-    MeshGeoChecker,
-)
-from asset3d_gen.validators.urdf_convertor import URDFGenerator
-TMP_DIR = os.path.join(
-    os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
-)
-os.makedirs(TMP_DIR, exist_ok=True)
-def start_session(req: gr.Request) -> None:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
-def end_session(req: gr.Request) -> None:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
 with gr.Blocks(
@@ -220,7 +197,7 @@ with gr.Blocks(
                     fn=preprocess_image_fn,
                     outputs=[image_prompt],
                     run_on_click=True,
-                    examples_per_page=32,
                 )
             with gr.Row(visible=False) as single_sam_image_example:
@@ -236,7 +213,7 @@ with gr.Blocks(
                     fn=preprocess_sam_image_fn,
                     outputs=[image_prompt_sam],
                     run_on_click=True,
-                    examples_per_page=32,
                 )
         with gr.Column(scale=1):
             video_output = gr.Video(
@@ -246,7 +223,7 @@ with gr.Blocks(
                 height=300,
             )
             model_output_gs = gr.Model3D(
-                label="Gaussian Representation", height=300, interactive=False # , clear_color=[0.9, 0.9, 0.9, 1.0],
             )
             aligned_gs = gr.Textbox(visible=False)
             with gr.Row():
@@ -381,7 +358,6 @@ with gr.Blocks(
             image_prompt_sam,
             selected_points,
             fg_bg_radio,
-            # gr.State(lambda: SAM_PREDICTOR),
         ],
         [image_mask_sam, image_seg_sam],
     )
@@ -404,9 +380,6 @@ with gr.Blocks(
             ss_sampling_steps,
             slat_guidance_strength,
             slat_sampling_steps,
-            # gr.State(lambda: IMAGE_BUFFER),
-            # gr.State(lambda: PIPELINE),
-            gr.State(lambda: TMP_DIR),
             image_seg_sam,
             is_samimage,
         ],
@@ -421,9 +394,6 @@ with gr.Blocks(
         inputs=[
             output_buf,
             project_delight,
-            gr.State(lambda: TMP_DIR),
-            # gr.State(lambda: DELIGHT),
-            # gr.State(lambda: IMAGESR_MODEL),
         ],
         outputs=[
             model_output_mesh,
@@ -445,10 +415,6 @@ with gr.Blocks(
             height_range_text,
             mass_range_text,
             asset_version_text,
-            gr.State(lambda: TMP_DIR),
-            # gr.State(lambda: URDF_CONVERTOR),
-            # gr.State(lambda: IMAGE_BUFFER),
-            # gr.State(lambda: CHECKERS),
         ],
         outputs=[
             download_urdf,

 import os
+os.environ["GRADIO_APP"] = "imageto3d"
 from functools import partial
 import gradio as gr
 from common import (
     MAX_SEED,
     VERSION,
     active_btn_by_content,
     extract_3d_representations_v2,
     extract_urdf,
     preprocess_image_fn,
     preprocess_sam_image_fn,
     select_point,
+    start_session,
+    end_session,
 )
 from gradio.themes import Default
 from gradio.themes.utils.colors import slate
 with gr.Blocks(
                     fn=preprocess_image_fn,
                     outputs=[image_prompt],
                     run_on_click=True,
+                    examples_per_page=10,
                 )
             with gr.Row(visible=False) as single_sam_image_example:
                     fn=preprocess_sam_image_fn,
                     outputs=[image_prompt_sam],
                     run_on_click=True,
+                    examples_per_page=10,
                 )
         with gr.Column(scale=1):
             video_output = gr.Video(
                 height=300,
             )
             model_output_gs = gr.Model3D(
+                label="Gaussian Representation", height=300, interactive=False
             )
             aligned_gs = gr.Textbox(visible=False)
             with gr.Row():
             image_prompt_sam,
             selected_points,
             fg_bg_radio,
         ],
         [image_mask_sam, image_seg_sam],
     )
             ss_sampling_steps,
             slat_guidance_strength,
             slat_sampling_steps,
             image_seg_sam,
             is_samimage,
         ],
         inputs=[
             output_buf,
             project_delight,
         ],
         outputs=[
             model_output_mesh,
             height_range_text,
             mass_range_text,
             asset_version_text,
         ],
         outputs=[
             download_urdf,

asset3d_gen/models/text_model.py CHANGED Viewed

@@ -75,7 +75,7 @@ def build_text2img_ip_pipeline(
     pipe.set_ip_adapter_scale([ref_scale])
     pipe = pipe.to(device)
-    pipe.enable_model_cpu_offload()
     # pipe.enable_xformers_memory_efficient_attention()
     # pipe.enable_vae_slicing()

     pipe.set_ip_adapter_scale([ref_scale])
     pipe = pipe.to(device)
+    # pipe.enable_model_cpu_offload()
     # pipe.enable_xformers_memory_efficient_attention()
     # pipe.enable_vae_slicing()

common.py CHANGED Viewed

@@ -4,8 +4,9 @@ import os
 import sys
 from glob import glob
 from typing import Union
 import cv2
 import gradio as gr
 import numpy as np
 import spaces
@@ -45,6 +46,11 @@ from asset3d_gen.validators.quality_checkers import (
 )
 from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
 from asset3d_gen.utils.gpt_clients import GPT_CLIENT
 current_file_path = os.path.abspath(__file__)
 current_dir = os.path.dirname(current_file_path)
@@ -67,25 +73,68 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)
-MAX_SEED = 100000
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
-RBG_REMOVER = RembgRemover()
-SAM_PREDICTOR = SAMPredictor(model_type="vit_h")
 DELIGHT = DelightingModel()
 IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-    "JeffreyXiang/TRELLIS-image-large"
-)
-# PIPELINE.cuda()
-IMAGE_BUFFER = {}
-SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
-GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
-AESTHETIC_CHECKER = ImageAestheticChecker()
-CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
-URDF_CONVERTOR = URDFGenerator(GPT_CLIENT, render_view_num=4)
 @spaces.GPU
@@ -150,8 +199,7 @@ def preprocess_image_fn(
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    if IMAGE_BUFFER is not None:
-        IMAGE_BUFFER["raw_image"] = image
     image = RBG_REMOVER(image)
     image = trellis_preprocess(image)
@@ -160,15 +208,13 @@ def preprocess_image_fn(
 @spaces.GPU
-def preprocess_sam_image_fn(
-    image: Image.Image, buffer: dict, model: SAMPredictor
-) -> Image.Image:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    buffer["raw_image"] = image
-    sam_image = model.preprocess_image(image)
-    model.predictor.set_image(sam_image)
     return sam_image
@@ -254,7 +300,6 @@ def select_point(
     image: np.ndarray,
     sel_pix: list,
     point_type: str,
-    model: SAMPredictor,
     evt: gr.SelectData,
 ):
     if point_type == "foreground_point":
@@ -264,8 +309,8 @@ def select_point(
     else:
         sel_pix.append((evt.index, 1))  # default foreground_point
-    masks = model.generate_masks(image, sel_pix)
-    seg_image = model.get_segmented_image(image, masks)
     for point, label in sel_pix:
         color = (255, 0, 0) if label == 0 else (0, 255, 0)
@@ -292,9 +337,6 @@ def image_to_3d(
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
-    buffer: dict,
-    pipeline: TrellisImageTo3DPipeline,
-    output_root: str,
     sam_image: Image.Image = None,
     is_sam_image: bool = False,
     req: gr.Request = None,
@@ -309,10 +351,10 @@ def image_to_3d(
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
-    buffer["seg_image"] = seg_image
-    pipeline.cuda()
-    outputs = pipeline.run(
         seg_image,
         seed=seed,
         formats=["gaussian", "mesh"],
@@ -327,12 +369,13 @@ def image_to_3d(
         },
     )
     # Set to cpu for memory saving.
-    pipeline.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model)["color"]
     normal_images = render_video(mesh_model)["normal"]
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
     video_path = os.path.join(output_root, "gs_mesh.mp4")
@@ -347,9 +390,10 @@ def image_to_3d(
 @spaces.GPU
 def extract_3d_representations(
-    state: dict, enable_delight: bool, output_root: str, req: gr.Request
 ):
-    user_dir = os.path.join(output_root, str(req.session_hash))
     gs_model, mesh_model = unpack_state(state)
     mesh = postprocessing_utils.to_glb(
@@ -360,7 +404,7 @@ def extract_3d_representations(
         verbose=True,
     )
     filename = "sample"
-    gs_path = os.path.join(user_dir, f"{filename}_gs.ply")
     gs_model.save_ply(gs_path)
     # Rotate mesh and GS by 90 degrees around Z-axis.
@@ -378,9 +422,9 @@ def extract_3d_representations(
     )
     mesh.vertices = mesh.vertices @ np.array(rot_matrix)
-    mesh_obj_path = os.path.join(user_dir, f"{filename}.obj")
     mesh.export(mesh_obj_path)
-    mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
     mesh.export(mesh_glb_path)
     torch.cuda.empty_cache()
@@ -392,11 +436,9 @@ def extract_3d_representations(
 def extract_3d_representations_v2(
     state: dict,
     enable_delight: bool,
-    output_root: str,
-    delight_model: DelightingModel,
-    sr_model: Union[ImageRealESRGAN, ImageStableSR],
     req: gr.Request,
 ):
     user_dir = os.path.join(output_root, str(req.session_hash))
     gs_model, mesh_model = unpack_state(state)
@@ -432,8 +474,8 @@ def extract_3d_representations_v2(
     mesh.export(mesh_obj_path)
     mesh = backproject_api(
-        delight_model=delight_model,
-        imagesr_model=sr_model,
         color_path=color_path,
         mesh_path=mesh_obj_path,
         output_path=mesh_obj_path,
@@ -457,16 +499,14 @@ def extract_urdf(
     height_range_text: str,
     mass_range_text: str,
     asset_version_text: str,
-    output_root: str,
-    urdf_convertor: URDFGenerator,
-    buffer: dict,
-    checkers: list[BaseChecker],
     req: gr.Request = None,
 ):
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
     # Convert to URDF and recover attrs by gpt4o
     filename = "sample"
     asset_attrs = {
         "version": VERSION,
         "gs_model": f"{urdf_convertor.output_mesh_dir}/{filename}_gs.ply",
@@ -522,13 +562,13 @@ def extract_urdf(
     image_dir = f"{output_root}/URDF_{filename}/{urdf_convertor.output_render_dir}/image_color"  # noqa
     image_paths = glob(f"{image_dir}/*.png")
     images_list = []
-    for checker in checkers:
         images = image_paths
         if isinstance(checker, ImageSegChecker):
-            images = [buffer["raw_image"], buffer["seg_image"]]
         images_list.append(images)
-    results = BaseChecker.validate(checkers, images_list)
     urdf_convertor.add_quality_tag(urdf_path, results)
     # Zip urdf files
@@ -559,11 +599,7 @@ def extract_urdf(
 @spaces.GPU
 def text2image_fn(
     prompt: str,
-    output_root: str,
     guidance_scale: float,
-    model_ip: StableDiffusionXLPipelineIP,
-    model_img: StableDiffusionXLPipeline,
-    bg_model: RembgRemover,
     infer_step: int = 50,
     ip_image: Image.Image | str = None,
     ip_adapt_scale: float = 0.3,
@@ -574,11 +610,12 @@ def text2image_fn(
 ):
     if isinstance(image_wh, int):
         image_wh = (image_wh, image_wh)
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
         os.makedirs(output_root, exist_ok=True)
-    pipeline = model_img if ip_image is None else model_ip
     if ip_image is not None:
         pipeline.set_ip_adapter_scale([ip_adapt_scale])
@@ -594,7 +631,7 @@ def text2image_fn(
     if postprocess:
         for idx in range(len(images)):
             image = images[idx]
-            images[idx] = preprocess_image_fn(image, bg_model)
     save_paths = []
     for idx, image in enumerate(images):
@@ -608,3 +645,199 @@ def text2image_fn(
     torch.cuda.empty_cache()
     return save_paths + save_paths

 import sys
 from glob import glob
 from typing import Union
+import shutil
 import cv2
+import subprocess
 import gradio as gr
 import numpy as np
 import spaces
 )
 from asset3d_gen.validators.urdf_convertor import URDFGenerator, zip_files
 from asset3d_gen.utils.gpt_clients import GPT_CLIENT
+from asset3d_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
+from asset3d_gen.scripts.text2image import (
+    build_text2img_ip_pipeline,
+    build_text2img_pipeline,
+)
 current_file_path = os.path.abspath(__file__)
 current_dir = os.path.dirname(current_file_path)
 logger = logging.getLogger(__name__)
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
+MAX_SEED = 100000
+IMAGE_BUFFER = {}
 DELIGHT = DelightingModel()
 IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
+if os.getenv("GRADIO_APP") == "imageto3d":
+    RBG_REMOVER = RembgRemover()
+    SAM_PREDICTOR = SAMPredictor(model_type="vit_h")
+    PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+        "JeffreyXiang/TRELLIS-image-large"
+    )
+    # PIPELINE.cuda()
+    SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
+    GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
+    AESTHETIC_CHECKER = ImageAestheticChecker()
+    CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
+    TMP_DIR = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
+    )
+elif os.getenv("GRADIO_APP") == "textto3d":
+    RBG_REMOVER = RembgRemover()
+    PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+        "JeffreyXiang/TRELLIS-image-large"
+    )
+    # PIPELINE.cuda()
+    PIPELINE_IMG_IP = build_text2img_ip_pipeline("weights/Kolors", ref_scale=0.3)
+    PIPELINE_IMG = build_text2img_pipeline("weights/Kolors")
+    SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
+    GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
+    AESTHETIC_CHECKER = ImageAestheticChecker()
+    CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
+    TMP_DIR = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
+    )
+elif os.getenv("GRADIO_APP") == "texture_edit":
+    PIPELINE_IP = build_texture_gen_pipe(
+        base_ckpt_dir="./weights",
+        ip_adapt_scale=0.7,
+        device="cuda",
+    )
+    PIPELINE = build_texture_gen_pipe(
+        base_ckpt_dir="./weights",
+        ip_adapt_scale=0,
+        device="cuda",
+    )
+    TMP_DIR = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
+    )
+os.makedirs(TMP_DIR, exist_ok=True)
+def start_session(req: gr.Request) -> None:
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True)
+def end_session(req: gr.Request) -> None:
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    if os.path.exists(user_dir):
+        shutil.rmtree(user_dir)
 @spaces.GPU
     elif isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    IMAGE_BUFFER["raw_image"] = image
     image = RBG_REMOVER(image)
     image = trellis_preprocess(image)
 @spaces.GPU
+def preprocess_sam_image_fn(image: Image.Image) -> Image.Image:
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    IMAGE_BUFFER["raw_image"] = image
+    sam_image = SAM_PREDICTOR.preprocess_image(image)
+    SAM_PREDICTOR.predictor.set_image(sam_image)
     return sam_image
     image: np.ndarray,
     sel_pix: list,
     point_type: str,
     evt: gr.SelectData,
 ):
     if point_type == "foreground_point":
     else:
         sel_pix.append((evt.index, 1))  # default foreground_point
+    masks = SAM_PREDICTOR.generate_masks(image, sel_pix)
+    seg_image = SAM_PREDICTOR.get_segmented_image(image, masks)
     for point, label in sel_pix:
         color = (255, 0, 0) if label == 0 else (0, 255, 0)
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     sam_image: Image.Image = None,
     is_sam_image: bool = False,
     req: gr.Request = None,
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
+    IMAGE_BUFFER["seg_image"] = seg_image
+    PIPELINE.cuda()
+    outputs = PIPELINE.run(
         seg_image,
         seed=seed,
         formats=["gaussian", "mesh"],
         },
     )
     # Set to cpu for memory saving.
+    PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
     color_images = render_video(gs_model)["color"]
     normal_images = render_video(mesh_model)["normal"]
+    output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
     video_path = os.path.join(output_root, "gs_mesh.mp4")
 @spaces.GPU
 def extract_3d_representations(
+    state: dict, enable_delight: bool, req: gr.Request
 ):
+    output_root = TMP_DIR
+    output_root = os.path.join(output_root, str(req.session_hash))
     gs_model, mesh_model = unpack_state(state)
     mesh = postprocessing_utils.to_glb(
         verbose=True,
     )
     filename = "sample"
+    gs_path = os.path.join(output_root, f"{filename}_gs.ply")
     gs_model.save_ply(gs_path)
     # Rotate mesh and GS by 90 degrees around Z-axis.
     )
     mesh.vertices = mesh.vertices @ np.array(rot_matrix)
+    mesh_obj_path = os.path.join(output_root, f"{filename}.obj")
     mesh.export(mesh_obj_path)
+    mesh_glb_path = os.path.join(output_root, f"{filename}.glb")
     mesh.export(mesh_glb_path)
     torch.cuda.empty_cache()
 def extract_3d_representations_v2(
     state: dict,
     enable_delight: bool,
     req: gr.Request,
 ):
+    output_root = TMP_DIR
     user_dir = os.path.join(output_root, str(req.session_hash))
     gs_model, mesh_model = unpack_state(state)
     mesh.export(mesh_obj_path)
     mesh = backproject_api(
+        delight_model=DELIGHT,
+        imagesr_model=IMAGESR_MODEL,
         color_path=color_path,
         mesh_path=mesh_obj_path,
         output_path=mesh_obj_path,
     height_range_text: str,
     mass_range_text: str,
     asset_version_text: str,
     req: gr.Request = None,
 ):
+    output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
     # Convert to URDF and recover attrs by gpt4o
     filename = "sample"
+    urdf_convertor = URDFGenerator(GPT_CLIENT, render_view_num=4)
     asset_attrs = {
         "version": VERSION,
         "gs_model": f"{urdf_convertor.output_mesh_dir}/{filename}_gs.ply",
     image_dir = f"{output_root}/URDF_{filename}/{urdf_convertor.output_render_dir}/image_color"  # noqa
     image_paths = glob(f"{image_dir}/*.png")
     images_list = []
+    for checker in CHECKERS:
         images = image_paths
         if isinstance(checker, ImageSegChecker):
+            images = [IMAGE_BUFFER["raw_image"], IMAGE_BUFFER["seg_image"]]
         images_list.append(images)
+    results = BaseChecker.validate(CHECKERS, images_list)
     urdf_convertor.add_quality_tag(urdf_path, results)
     # Zip urdf files
 @spaces.GPU
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
     infer_step: int = 50,
     ip_image: Image.Image | str = None,
     ip_adapt_scale: float = 0.3,
 ):
     if isinstance(image_wh, int):
         image_wh = (image_wh, image_wh)
+    output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
         os.makedirs(output_root, exist_ok=True)
+    pipeline = PIPELINE_IMG if ip_image is None else PIPELINE_IMG_IP
     if ip_image is not None:
         pipeline.set_ip_adapter_scale([ip_adapt_scale])
     if postprocess:
         for idx in range(len(images)):
             image = images[idx]
+            images[idx] = preprocess_image_fn(image, RBG_REMOVER)
     save_paths = []
     for idx, image in enumerate(images):
     torch.cuda.empty_cache()
     return save_paths + save_paths
+@spaces.GPU
+def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    command = [
+        "drender-cli",
+        "--mesh_path",
+        mesh_path,
+        "--output_root",
+        f"{output_root}/condition",
+        "--uuid",
+        f"{uuid}",
+    ]
+    _ = subprocess.run(
+        command, capture_output=True, text=True, encoding="utf-8"
+    )
+    gc.collect()
+    torch.cuda.empty_cache()
+    return None, None, None
+@spaces.GPU
+def generate_texture_mvimages(
+    prompt: str,
+    controlnet_cond_scale: float = 0.55,
+    guidance_scale: float = 9,
+    strength: float = 0.9,
+    num_inference_steps: int = 50,
+    seed: int = 0,
+    ip_adapt_scale: float = 0,
+    ip_img_path: str = None,
+    uid: str = "sample",
+    sub_idxs: tuple[tuple[int]] = ((0, 1, 2), (3, 4, 5)),
+    req: gr.Request = None,
+) -> list[str]:
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    use_ip_adapter = True if ip_img_path and ip_adapt_scale > 0 else False
+    PIPELINE_IP.set_ip_adapter_scale([ip_adapt_scale])
+    img_save_paths = infer_pipe(
+        index_file=f"{output_root}/condition/index.json",
+        controlnet_cond_scale=controlnet_cond_scale,
+        guidance_scale=guidance_scale,
+        strength=strength,
+        num_inference_steps=num_inference_steps,
+        ip_adapt_scale=ip_adapt_scale,
+        ip_img_path=ip_img_path,
+        uid=uid,
+        prompt=prompt,
+        save_dir=f"{output_root}/multi_view",
+        sub_idxs=sub_idxs,
+        pipeline=PIPELINE_IP if use_ip_adapter else PIPELINE,
+        seed=seed,
+    )
+    gc.collect()
+    torch.cuda.empty_cache()
+    return img_save_paths + img_save_paths
+@spaces.GPU
+def backproject_texture(
+    mesh_path: str,
+    input_image: str,
+    texture_size: int,
+    uuid: str = "sample",
+    req: gr.Request = None,
+) -> str:
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    output_dir = os.path.join(output_root, "texture_mesh")
+    os.makedirs(output_dir, exist_ok=True)
+    command = [
+        "backproject-cli",
+        "--mesh_path",
+        mesh_path,
+        "--input_image",
+        input_image,
+        "--output_root",
+        output_dir,
+        "--uuid",
+        f"{uuid}",
+        "--texture_size",
+        str(texture_size),
+        "--skip_fix_mesh",
+    ]
+    _ = subprocess.run(
+        command, capture_output=True, text=True, encoding="utf-8"
+    )
+    output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")
+    output_glb_mesh = os.path.join(output_dir, f"{uuid}.glb")
+    _ = trimesh.load(output_obj_mesh).export(output_glb_mesh)
+    zip_file = zip_files(
+        input_paths=[
+            output_glb_mesh,
+            output_obj_mesh,
+            os.path.join(output_dir, "material.mtl"),
+            os.path.join(output_dir, "material_0.png"),
+        ],
+        output_zip=os.path.join(output_dir, f"{uuid}.zip"),
+    )
+    gc.collect()
+    torch.cuda.empty_cache()
+    return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
+def backproject_texture_v2(
+    mesh_path: str,
+    input_image: str,
+    texture_size: int,
+    enable_delight: bool = True,
+    fix_mesh: bool = False,
+    uuid: str = "sample",
+    req: gr.Request = None,
+) -> str:
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    output_dir = os.path.join(output_root, "texture_mesh")
+    os.makedirs(output_dir, exist_ok=True)
+    textured_mesh = backproject_api(
+        delight_model=DELIGHT,
+        imagesr_model=IMAGESR_MODEL,
+        color_path=input_image,
+        mesh_path=mesh_path,
+        output_path=f"{output_dir}/{uuid}.obj",
+        skip_fix_mesh=not fix_mesh,
+        delight=enable_delight,
+        texture_wh=[texture_size, texture_size],
+    )
+    output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")
+    output_glb_mesh = os.path.join(output_dir, f"{uuid}.glb")
+    _ = textured_mesh.export(output_glb_mesh)
+    zip_file = zip_files(
+        input_paths=[
+            output_glb_mesh,
+            output_obj_mesh,
+            os.path.join(output_dir, "material.mtl"),
+            os.path.join(output_dir, "material_0.png"),
+        ],
+        output_zip=os.path.join(output_dir, f"{uuid}.zip"),
+    )
+    gc.collect()
+    torch.cuda.empty_cache()
+    return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
+def render_result_video(
+    mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
+) -> str:
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    output_dir = os.path.join(output_root, "texture_mesh")
+    command = [
+        "drender-cli",
+        "--mesh_path",
+        mesh_path,
+        "--output_root",
+        output_dir,
+        "--num_images",
+        "90",
+        "--elevation",
+        "20",
+        "--with_mtl",
+        "--pbr_light_factor",
+        "1.",
+        "--uuid",
+        f"{uuid}",
+        "--gen_color_mp4",
+        "--gen_glonormal_mp4",
+        "--distance",
+        "5.5",
+        "--resolution_hw",
+        f"{video_size}",
+        f"{video_size}",
+    ]
+    _ = subprocess.run(
+        command, capture_output=True, text=True, encoding="utf-8"
+    )
+    gc.collect()
+    torch.cuda.empty_cache()
+    return f"{output_dir}/color.mp4"