Spaces:

jingyangcarl
/

matgen

Sleeping

App Files Files Community

jingyangcarl commited on 4 days ago

Commit

05d00b7

1 Parent(s): ac36933

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

DockerFile → Dockerfile +2 -0
README.md +4 -22
app.py +10 -74
app_3d.py +0 -21
app_canny.py +0 -83
app_matnet.py +0 -83
app_sd.py +0 -154
app_texnet.py +0 -259
cv_utils.py +0 -17
depth_estimator.py +0 -25
examples/bunny/frame_0001.png +0 -3
examples/bunny/mesh.obj +0 -0
examples/bunny/uv_normal.png +0 -3
examples/fighter/frame_0001.png +0 -3
examples/fighter/mesh.obj +0 -0
examples/fighter/uv_normal.png +0 -3
examples/highheel/frame_0001.png +0 -3
examples/highheel/mesh.obj +0 -0
examples/highheel/uv_normal.png +0 -3
examples/monkey/frame_0001.png +0 -3
examples/monkey/mesh.obj +0 -0
examples/monkey/uv_normal.png +0 -3
examples/tank/frame_0001.png +0 -3
examples/tank/mesh.obj +0 -3
examples/tank/uv_normal.png +0 -3
examples/tshirt/frame_0001.png +0 -3
examples/tshirt/mesh.obj +0 -3
examples/tshirt/uv_normal.png +0 -3
image_segmentor.py +0 -33
install.sh +0 -18
model.py +0 -959
pre-requirements.txt +0 -9
preprocessor.py +0 -120
push_dataset.py +0 -9
requirements.txt +0 -9
rgb2x/generate_blend.py +0 -142
rgb2x/gradio_demo_rgb2x.py +0 -157
rgb2x/load_image.py +0 -119
rgb2x/pipeline_rgb2x.py +0 -821
run.sh +5 -0
settings.py +0 -23
text2tex/lib/__init__.py +0 -0
text2tex/lib/camera_helper.py +0 -231
text2tex/lib/constants.py +0 -648
text2tex/lib/diffusion_helper.py +0 -189
text2tex/lib/io_helper.py +0 -78
text2tex/lib/mesh_helper.py +0 -148
text2tex/lib/projection_helper.py +0 -464
text2tex/lib/render_helper.py +0 -108
text2tex/lib/shading_helper.py +0 -45

DockerFile → Dockerfile RENAMED Viewed

@@ -8,8 +8,10 @@ RUN conda env create -f /code/environment.yml
 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
     PYTHONPATH=$HOME/app \

 # Set up a new user named "user" with user ID 1000
 RUN useradd -m -u 1000 user
 # Switch to the "user" user
 USER user
 # Set home to the user's home directory
 ENV HOME=/home/user \
     PYTHONPATH=$HOME/app \

README.md CHANGED Viewed

@@ -1,28 +1,10 @@
 ---
-title: Matgen
-emoji: 🖼
-colorFrom: purple
-colorTo: red
 sdk: docker
 pinned: false
-license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-## setup locally
-conda create -n matgen python=3.11
-conda activate matgen
-pip install diffusers["torch"] transformers accelerate xformers
-pip install gradio
-pip install controlnet-aux
-## local authen
-huggingface-cli login
-## on using Huggingface ZeroGPU
-need to import spaces and the corresponding decorator
-https://huggingface.co/docs/hub/spaces-zerogpu
-also, check the usage of controlnet over zerogpu here: https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL/blob/main/app.py

 ---
+title: Gradio Conda Template
+emoji: 🐨
+colorFrom: blue
+colorTo: indigo
 sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,80 +1,16 @@
-#!/usr/bin/env python
 import gradio as gr
-import torch
-import sys
-pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
-version_str="".join([
-    f"py3{sys.version_info.minor}_cu",
-    torch.version.cuda.replace(".",""),
-    f"_pyt{pyt_version_str}"
-])
-print(f"Using version: {version_str}") # used to locate pytorch3d version in the requirements.txt for huggingface
-from app_canny import create_demo as create_demo_canny
-from app_texnet import create_demo as create_demo_texnet
-from model import Model
-from settings import ALLOW_CHANGING_BASE_MODEL, DEFAULT_MODEL_ID, SHOW_DUPLICATE_BUTTON
-DESCRIPTION = "# Material Authoring Demo v0.3"
-if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p> Check if the 'CUDA_VISIBLE_DEVICES' are set incorrectly in settings.py"
-# model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="Canny")
-model = Model(base_model_id=DEFAULT_MODEL_ID, task_name="texnet")
 with gr.Blocks() as demo:
-    gr.Markdown(DESCRIPTION)
-    gr.DuplicateButton(
-        value="Duplicate Space for private use",
-        elem_id="duplicate-button",
-        visible=SHOW_DUPLICATE_BUTTON,
-    )
-    with gr.Tabs():
-        with gr.Tab("Texnet+Matnet"):
-            create_demo_texnet(model.process_texnet)
-    with gr.Accordion(label="Base model", open=False):
-        with gr.Row():
-            with gr.Column(scale=5):
-                current_base_model = gr.Text(label="Current base model")
-            with gr.Column(scale=1):
-                check_base_model_button = gr.Button("Check current base model")
-        with gr.Row():
-            with gr.Column(scale=5):
-                new_base_model_id = gr.Text(
-                    label="New base model",
-                    max_lines=1,
-                    placeholder="stable-diffusion-v1-5/stable-diffusion-v1-5",
-                    info="The base model must be compatible with Stable Diffusion v1.5.",
-                    interactive=ALLOW_CHANGING_BASE_MODEL,
-                )
-            with gr.Column(scale=1):
-                change_base_model_button = gr.Button("Change base model", interactive=ALLOW_CHANGING_BASE_MODEL)
-        if not ALLOW_CHANGING_BASE_MODEL:
-            gr.Markdown(
-                """The base model is not allowed to be changed in this Space so as not to slow down the demo, but it can be changed if you duplicate the Space."""
-            )
-    check_base_model_button.click(
-        fn=lambda: model.base_model_id,
-        outputs=current_base_model,
-        queue=False,
-        api_name="check_base_model",
-    )
-    gr.on(
-        triggers=[new_base_model_id.submit, change_base_model_button.click],
-        fn=model.set_base_model,
-        inputs=new_base_model_id,
-        outputs=current_base_model,
-        api_name=False,
-        concurrency_id="main",
-    )
-if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 import gradio as gr
+def update(name):
+    return f"Welcome to Gradio, {name}!"
 with gr.Blocks() as demo:
+    gr.Markdown("Start typing below and then click **Run** to see the output.")
+    with gr.Row():
+        inp = gr.Textbox(placeholder="What is your name?")
+        out = gr.Textbox()
+    btn = gr.Button("Run")
+    btn.click(fn=update, inputs=inp, outputs=out)
+demo.launch()

app_3d.py DELETED Viewed

@@ -1,21 +0,0 @@
-import gradio as gr
-import os
-def load_mesh(mesh_file_name):
-    return mesh_file_name
-demo = gr.Interface(
-    fn=load_mesh,
-    inputs=gr.Model3D(),
-    outputs=gr.Model3D(
-            clear_color=(255.0, 0.0, 0.0, 0.0),  label="3D Model", display_mode="wireframe"),
-    examples=[
-        [os.path.join(os.path.dirname(__file__), "examples/bunny/mesh.obj")],
-        [os.path.join(os.path.dirname(__file__), "examples/monkey/mesh.obj")],
-        [os.path.join(os.path.dirname(__file__), "examples/Bunny.obj")],
-    ],
-    cache_examples=True
-)
-if __name__ == "__main__":
-    demo.launch()

app_canny.py DELETED Viewed

@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-import gradio as gr
-from settings import (
-    DEFAULT_IMAGE_RESOLUTION,
-    DEFAULT_NUM_IMAGES,
-    MAX_IMAGE_RESOLUTION,
-    MAX_NUM_IMAGES,
-    MAX_SEED,
-)
-from utils import randomize_seed_fn
-def create_demo(process):
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column():
-                image = gr.Image()
-                prompt = gr.Textbox(label="Prompt", submit_btn=True)
-                with gr.Accordion("Advanced options", open=False):
-                    num_samples = gr.Slider(
-                        label="Number of images", minimum=1, maximum=MAX_NUM_IMAGES, value=DEFAULT_NUM_IMAGES, step=1
-                    )
-                    image_resolution = gr.Slider(
-                        label="Image resolution",
-                        minimum=256,
-                        maximum=MAX_IMAGE_RESOLUTION,
-                        value=DEFAULT_IMAGE_RESOLUTION,
-                        step=256,
-                    )
-                    canny_low_threshold = gr.Slider(
-                        label="Canny low threshold", minimum=1, maximum=255, value=100, step=1
-                    )
-                    canny_high_threshold = gr.Slider(
-                        label="Canny high threshold", minimum=1, maximum=255, value=200, step=1
-                    )
-                    num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
-                    guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    a_prompt = gr.Textbox(label="Additional prompt", value="best quality, extremely detailed")
-                    n_prompt = gr.Textbox(
-                        label="Negative prompt",
-                        value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-                    )
-            with gr.Column():
-                result = gr.Gallery(label="Output", show_label=False, columns=2, object_fit="scale-down")
-        inputs = [
-            image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            canny_low_threshold,
-            canny_high_threshold,
-        ]
-        prompt.submit(
-            fn=randomize_seed_fn,
-            inputs=[seed, randomize_seed],
-            outputs=seed,
-            queue=False,
-            api_name=False,
-        ).then(
-            fn=process,
-            inputs=inputs,
-            outputs=result,
-            api_name="canny",
-            concurrency_id="main",
-        )
-    return demo
-if __name__ == "__main__":
-    from model import Model
-    model = Model(task_name="Canny")
-    demo = create_demo(model.process_canny)
-    demo.queue().launch()

app_matnet.py DELETED Viewed

@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-import gradio as gr
-from settings import (
-    DEFAULT_IMAGE_RESOLUTION,
-    DEFAULT_NUM_IMAGES,
-    MAX_IMAGE_RESOLUTION,
-    MAX_NUM_IMAGES,
-    MAX_SEED,
-)
-from utils import randomize_seed_fn
-def create_demo(process):
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column():
-                image = gr.Image()
-                prompt = gr.Textbox(label="Prompt", submit_btn=True)
-                with gr.Accordion("Advanced options", open=False):
-                    num_samples = gr.Slider(
-                        label="Number of images", minimum=1, maximum=MAX_NUM_IMAGES, value=DEFAULT_NUM_IMAGES, step=1
-                    )
-                    image_resolution = gr.Slider(
-                        label="Image resolution",
-                        minimum=256,
-                        maximum=MAX_IMAGE_RESOLUTION,
-                        value=DEFAULT_IMAGE_RESOLUTION,
-                        step=256,
-                    )
-                    canny_low_threshold = gr.Slider(
-                        label="Canny low threshold", minimum=1, maximum=255, value=100, step=1
-                    )
-                    canny_high_threshold = gr.Slider(
-                        label="Canny high threshold", minimum=1, maximum=255, value=200, step=1
-                    )
-                    num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=20, step=1)
-                    guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    a_prompt = gr.Textbox(label="Additional prompt", value="best quality, extremely detailed")
-                    n_prompt = gr.Textbox(
-                        label="Negative prompt",
-                        value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-                    )
-            with gr.Column():
-                result = gr.Gallery(label="Output", show_label=False, columns=2, object_fit="scale-down")
-        inputs = [
-            image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-            canny_low_threshold,
-            canny_high_threshold,
-        ]
-        prompt.submit(
-            fn=randomize_seed_fn,
-            inputs=[seed, randomize_seed],
-            outputs=seed,
-            queue=False,
-            api_name=False,
-        ).then(
-            fn=process,
-            inputs=inputs,
-            outputs=result,
-            api_name="canny",
-            concurrency_id="main",
-        )
-    return demo
-if __name__ == "__main__":
-    from model import Model
-    model = Model(task_name="Canny")
-    demo = create_demo(model.process_canny)
-    demo.queue().launch()

app_sd.py DELETED Viewed

@@ -1,154 +0,0 @@
-import gradio as gr
-import numpy as np
-import random
-import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-@spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
-    )
-if __name__ == "__main__":
-    demo.launch()

app_texnet.py DELETED Viewed

@@ -1,259 +0,0 @@
-#!/usr/bin/env python
-import os
-import shutil
-import tempfile
-import gradio as gr
-from PIL import Image
-import numpy as np
-from settings import (
-    DEFAULT_IMAGE_RESOLUTION,
-    DEFAULT_NUM_IMAGES,
-    MAX_IMAGE_RESOLUTION,
-    MAX_NUM_IMAGES,
-    MAX_SEED,
-)
-from utils import randomize_seed_fn
-# ---- helper to build a quick textured copy of the mesh ---------------
-def apply_texture(src_mesh:str, texture:str, tag:str)->str:
-    """
-    Writes a copy of `src_mesh` and tiny .mtl that points to `texture`.
-    Returns the new OBJ/GLB path for viewing.
-    """
-    tmp_dir = tempfile.mkdtemp()
-    mesh_copy = os.path.join(tmp_dir, f"{tag}.obj")
-    mtl_name  = f"{tag}.mtl"
-    # copy geometry
-    shutil.copy(src_mesh, mesh_copy)
-    # write minimal MTL
-    with open(os.path.join(tmp_dir, mtl_name), "w") as f:
-        f.write(f"newmtl material_0\nmap_Kd {os.path.basename(texture)}\n")
-    # ensure texture lives next to OBJ
-    shutil.copy(texture, os.path.join(tmp_dir, os.path.basename(texture)))
-    # patch OBJ to reference our new MTL
-    with open(mesh_copy, "r+") as f:
-        lines = f.readlines()
-        if not lines[0].startswith("mtllib"):
-            lines.insert(0, f"mtllib {mtl_name}\n")
-            f.seek(0); f.writelines(lines)
-    return mesh_copy
-def image_to_temp_path(img_like, tag, out_dir=None):
-    """
-    Convert various image-like objects (str, PIL.Image, list, tuple) to temp PNG path.
-    Returns the path to the saved image file.
-    """
-    # Handle tuple or list input
-    if isinstance(img_like, (list, tuple)):
-        if len(img_like) == 0:
-            raise ValueError("Empty image list/tuple.")
-        img_like = img_like[0]
-    # If it's already a file path
-    if isinstance(img_like, str):
-        return img_like
-    # If it's a PIL Image
-    if isinstance(img_like, Image.Image):
-        temp_path = os.path.join(tempfile.mkdtemp() if out_dir is None else out_dir, f"{tag}.png")
-        os.makedirs(os.path.dirname(temp_path), exist_ok=True)
-        img_like.save(temp_path)
-        return temp_path
-    # if it's numpy array
-    if isinstance(img_like, np.ndarray):
-        temp_path = os.path.join(tempfile.mkdtemp() if out_dir is None else out_dir, f"{tag}.png")
-        os.makedirs(os.path.dirname(temp_path), exist_ok=True)
-        img_like = Image.fromarray(img_like)
-        img_like.save(temp_path)
-        return temp_path
-    raise ValueError(f"Expected PIL.Image, str, list, or tuple — got {type(img_like)}")
-def show_mesh(which, mesh, inp, coarse, fine):
-    """Switch the displayed texture based on dropdown change."""
-    print()
-    tex_map = {
-        "Input": image_to_temp_path(inp, "input"),
-        "Coarse": coarse[0] if isinstance(coarse, tuple) else coarse,
-        "Fine": fine[0] if isinstance(fine, tuple) else fine,
-    }
-    texture_path = tex_map[which]
-    return apply_texture(mesh, texture_path, which.lower())
-# ----------------------------------------------------------------------
-def create_demo(process):
-    with gr.Blocks() as demo:
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("## Select preset from the example list, and modify the prompt accordingly")
-                with gr.Row():
-                    name = gr.Textbox(label="Name", interactive=False, visible=False)
-                    representative = gr.Image(label="Geometry", interactive=False)
-                    image = gr.Image(label="UV Normal", interactive=False)
-                prompt = gr.Textbox(label="Prompt", submit_btn=True)
-                with gr.Accordion("Advanced options", open=False):
-                    num_samples = gr.Slider(
-                        label="Number of images", minimum=1, maximum=MAX_NUM_IMAGES, value=DEFAULT_NUM_IMAGES, step=1
-                    )
-                    image_resolution = gr.Slider(
-                        label="Image resolution",
-                        minimum=256,
-                        maximum=MAX_IMAGE_RESOLUTION,
-                        value=DEFAULT_IMAGE_RESOLUTION,
-                        step=256,
-                    )
-                    num_steps = gr.Slider(label="Number of steps", minimum=1, maximum=100, value=10, step=1)
-                    guidance_scale = gr.Slider(label="Guidance scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-                    a_prompt = gr.Textbox(label="Additional prompt", value="best quality, extremely detailed")
-                    n_prompt = gr.Textbox(
-                        label="Negative prompt",
-                        value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-                    )
-            with gr.Column():
-                # 2x2 grid of images for the output textures
-                gr.Markdown("### Output BRDF")
-                with gr.Row():
-                    base_color = gr.Gallery(label="Base Color", show_label=True, columns=1, object_fit="scale-down")
-                    normal = gr.Gallery(label="Displacement Map", show_label=True, columns=1, object_fit="scale-down")
-                with gr.Row():
-                    roughness = gr.Gallery(label="Roughness Map", show_label=True, columns=1, object_fit="scale-down")
-                    metallic = gr.Gallery(label="Metallic Map", show_label=True, columns=1, object_fit="scale-down")
-                gr.Markdown("### Download Packed Blender Files for 3D Visualization")
-                out_blender_path = gr.File(label="Generated Blender File", file_types=[".blend"])
-        inputs = [
-            name,  # Name of the object
-            representative,  # Geometry mesh
-            image,
-            prompt,
-            a_prompt,
-            n_prompt,
-            num_samples,
-            image_resolution,
-            num_steps,
-            guidance_scale,
-            seed,
-        ]
-        # first call → run diffusion / texture network
-        prompt.submit(
-            fn=randomize_seed_fn,
-            inputs=[seed, randomize_seed],
-            outputs=seed,
-            queue=False,
-            api_name=False,
-        ).then(
-            fn=process,
-            inputs=inputs,
-            outputs=[base_color, normal, roughness, metallic, out_blender_path],
-            api_name="canny",
-            concurrency_id="main",
-        )
-        gr.Examples(
-            fn=process,
-            inputs=inputs,
-            outputs=[base_color, normal, roughness, metallic],
-            examples=[
-                [
-                    "bunny",
-                    "examples/bunny/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/bunny/uv_normal/fused.png
-                    "examples/bunny/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/bunny/uv_normal/fused.png
-                    "feather",
-                    a_prompt.value,
-                    n_prompt.value,
-                    num_samples.value,
-                    image_resolution.value,
-                    num_steps.value,
-                    guidance_scale.value,
-                    seed.value,
-                ],
-                [
-                    "monkey",
-                    "examples/monkey/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "examples/monkey/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "wood",
-                    a_prompt.value,
-                    n_prompt.value,
-                    num_samples.value,
-                    image_resolution.value,
-                    num_steps.value,
-                    guidance_scale.value,
-                    seed.value,
-                ],
-                [
-                    "tshirt",
-                    "examples/tshirt/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "examples/tshirt/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "wood",
-                    a_prompt.value,
-                    n_prompt.value,
-                    num_samples.value,
-                    image_resolution.value,
-                    num_steps.value,
-                    guidance_scale.value,
-                    seed.value,
-                ],
-                # [
-                #     "highheel",
-                #     "examples/highheel/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                #     "examples/highheel/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                #     "wood",
-                #     a_prompt.value,
-                #     n_prompt.value,
-                #     num_samples.value,
-                #     image_resolution.value,
-                #     num_steps.value,
-                #     guidance_scale.value,
-                #     seed.value,
-                # ],
-                [
-                    "tank",
-                    "examples/tank/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "examples/tank/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "wood",
-                    a_prompt.value,
-                    n_prompt.value,
-                    num_samples.value,
-                    image_resolution.value,
-                    num_steps.value,
-                    guidance_scale.value,
-                    seed.value,
-                ],
-                [
-                    "fighter",
-                    "examples/fighter/frame_0001.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "examples/fighter/uv_normal.png", # /dgxusers/Users/jyang/project/ObjectReal/data/control/preprocess/monkey/uv_normal/fused.png
-                    "wood",
-                    a_prompt.value,
-                    n_prompt.value,
-                    num_samples.value,
-                    image_resolution.value,
-                    num_steps.value,
-                    guidance_scale.value,
-                    seed.value,
-                ],
-            ],
-        )
-    return demo
-if __name__ == "__main__":
-    from model import Model
-    model = Model(task_name="Texnet")
-    demo = create_demo(model.process_texnet)
-    demo.queue().launch()

cv_utils.py DELETED Viewed

@@ -1,17 +0,0 @@
-import cv2
-import numpy as np
-def resize_image(input_image, resolution, interpolation=None):
-    H, W, C = input_image.shape
-    H = float(H)
-    W = float(W)
-    k = float(resolution) / max(H, W)
-    H *= k
-    W *= k
-    H = int(np.round(H / 64.0)) * 64
-    W = int(np.round(W / 64.0)) * 64
-    if interpolation is None:
-        interpolation = cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA
-    img = cv2.resize(input_image, (W, H), interpolation=interpolation)
-    return img

depth_estimator.py DELETED Viewed

@@ -1,25 +0,0 @@
-import numpy as np
-import PIL.Image
-from controlnet_aux.util import HWC3
-from transformers import pipeline
-from cv_utils import resize_image
-class DepthEstimator:
-    def __init__(self):
-        self.model = pipeline("depth-estimation")
-    def __call__(self, image: np.ndarray, **kwargs) -> PIL.Image.Image:
-        detect_resolution = kwargs.pop("detect_resolution", 512)
-        image_resolution = kwargs.pop("image_resolution", 512)
-        image = np.array(image)
-        image = HWC3(image)
-        image = resize_image(image, resolution=detect_resolution)
-        image = PIL.Image.fromarray(image)
-        image = self.model(image)
-        image = image["depth"]
-        image = np.array(image)
-        image = HWC3(image)
-        image = resize_image(image, resolution=image_resolution)
-        return PIL.Image.fromarray(image)

examples/bunny/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: 7fedab3e148faac233d7eba7b2ab92f02998b8b3ba6a3ab1e3b823f1fdedf51b
Pointer size: 131 Bytes
Size of remote file: 468 kB

examples/bunny/mesh.obj DELETED Viewed

The diff for this file is too large to render. See raw diff

examples/bunny/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: 03e7c7aa7f14454b3b179aa4c5c30863e5c74c67fd858f8ea6c28e93630ecec0
Pointer size: 132 Bytes
Size of remote file: 2.3 MB

examples/fighter/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: 2ffaa00d5cd340167e7b13d0dd986dc6a6680c4b91595eec8d27d384f6670df7
Pointer size: 131 Bytes
Size of remote file: 423 kB

examples/fighter/mesh.obj DELETED Viewed

The diff for this file is too large to render. See raw diff

examples/fighter/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: 46d4c010107c4fa030ead5ca1b8ca66ade255bcb4194dc30e9f1195bba2da672
Pointer size: 131 Bytes
Size of remote file: 753 kB

examples/highheel/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: 9b9b91e5f99c06dd11372aa2ccb44cb996bc103ea19eacb94ee5611478c831b8
Pointer size: 131 Bytes
Size of remote file: 490 kB

examples/highheel/mesh.obj DELETED Viewed

The diff for this file is too large to render. See raw diff

examples/highheel/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: 52cf0dd687067109f160ba5015078077a9a6187c09305af5c57eec3c3d05c885
Pointer size: 132 Bytes
Size of remote file: 1.57 MB

examples/monkey/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: da466a3467871077c00e6ab2a6d105afb837f6372cf48867a31dd86ca6fdd157
Pointer size: 131 Bytes
Size of remote file: 491 kB

examples/monkey/mesh.obj DELETED Viewed

The diff for this file is too large to render. See raw diff

examples/monkey/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: f3710880d4777042bb2838fa01988e653ad5d932ac8ba5a817eb13869902ba03
Pointer size: 132 Bytes
Size of remote file: 2.03 MB

examples/tank/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: 58a1cd1df7b94ad52568952a46f6b9cf57d62c81290cca1c967250af7a15316b
Pointer size: 131 Bytes
Size of remote file: 512 kB

examples/tank/mesh.obj DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:301633de1a7757f78a6f67abb6e61bcc8e6a01f5a54a8582d1943ad0ad943211
-size 6942253

examples/tank/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: 9a7d1d168addc29d7953ea222cfb125b2d802188747e800b8e63dd686bcf9c06
Pointer size: 132 Bytes
Size of remote file: 6.13 MB

examples/tshirt/frame_0001.png DELETED Viewed

Git LFS Details

SHA256: 31f2e5239afc351695d176aeeefee23359f43b4c4b4fe40a1793bb9ccb80464b
Pointer size: 131 Bytes
Size of remote file: 496 kB

examples/tshirt/mesh.obj DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b7c6c9bdec8d646a1980e5b987a1182c92af84cc945ef49c1735d4337185d3e5
-size 39275876

examples/tshirt/uv_normal.png DELETED Viewed

Git LFS Details

SHA256: ee7f1df0f853fab91acdaf0240a6bf1444d8db56c310fe30efc6c98cc18c36c9
Pointer size: 132 Bytes
Size of remote file: 2.17 MB

image_segmentor.py DELETED Viewed

@@ -1,33 +0,0 @@
-import cv2
-import numpy as np
-import PIL.Image
-import torch
-from controlnet_aux.util import HWC3, ade_palette
-from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
-from cv_utils import resize_image
-class ImageSegmentor:
-    def __init__(self):
-        self.image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-small")
-        self.image_segmentor = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-small")
-    @torch.inference_mode()
-    def __call__(self, image: np.ndarray, **kwargs) -> PIL.Image.Image:
-        detect_resolution = kwargs.pop("detect_resolution", 512)
-        image_resolution = kwargs.pop("image_resolution", 512)
-        image = HWC3(image)
-        image = resize_image(image, resolution=detect_resolution)
-        image = PIL.Image.fromarray(image)
-        pixel_values = self.image_processor(image, return_tensors="pt").pixel_values
-        outputs = self.image_segmentor(pixel_values)
-        seg = self.image_processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
-        color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
-        for label, color in enumerate(ade_palette()):
-            color_seg[seg == label, :] = color
-        color_seg = color_seg.astype(np.uint8)
-        color_seg = resize_image(color_seg, resolution=image_resolution, interpolation=cv2.INTER_NEAREST)
-        return PIL.Image.fromarray(color_seg)

install.sh DELETED Viewed

@@ -1,18 +0,0 @@
-#!/bin/bash
-eval "$(conda shell.bash hook)"
-# conda activate base
-# conda remove -n matgen-plus --all
-conda create -n matgen-plus python=3.11
-conda activate matgen-plus
-pip install diffusers["torch"] transformers accelerate xformers
-pip install gradio
-pip install controlnet-aux
-# text2tex
-conda install pytorch3d -c pytorch -c conda-forge
-conda install -c conda-forge open-clip-torch pytorch-lightning
-pip install trimesh xatlas scikit-learn opencv-python omegaconf
-python app.py

model.py DELETED Viewed

@@ -1,959 +0,0 @@
-import gc
-# get socket and check if the name is vgldgx01
-import socket
-if socket.gethostname() != "vgldgx01":
-    import spaces #[uncomment to use ZeroGPU]
-import numpy as np
-import PIL.Image
-import torch
-from controlnet_aux.util import HWC3
-from diffusers import (
-    ControlNetModel,
-    DiffusionPipeline,
-    StableDiffusionControlNetPipeline,
-    StableDiffusionImg2ImgPipeline,
-    UniPCMultistepScheduler,
-    DDIMScheduler, #rgb2x
-)
-import torchvision
-from torchvision import transforms
-from cv_utils import resize_image
-from preprocessor import Preprocessor
-from settings import MAX_IMAGE_RESOLUTION, MAX_NUM_IMAGES
-from tqdm.auto import tqdm
-import subprocess
-from rgb2x.pipeline_rgb2x import StableDiffusionAOVMatEstPipeline
-from app_texnet import image_to_temp_path
-import os
-import time
-import tempfile
-from text2tex.scripts.generate_texture import text2tex_call, init_args
-from glob import glob
-CONTROLNET_MODEL_IDS = {
-    # "Openpose": "lllyasviel/control_v11p_sd15_openpose",
-    # "Canny": "lllyasviel/control_v11p_sd15_canny",
-    # "MLSD": "lllyasviel/control_v11p_sd15_mlsd",
-    # "scribble": "lllyasviel/control_v11p_sd15_scribble",
-    # "softedge": "lllyasviel/control_v11p_sd15_softedge",
-    # "segmentation": "lllyasviel/control_v11p_sd15_seg",
-    # "depth": "lllyasviel/control_v11f1p_sd15_depth",
-    # "NormalBae": "lllyasviel/control_v11p_sd15_normalbae",
-    # "lineart": "lllyasviel/control_v11p_sd15_lineart",
-    # "lineart_anime": "lllyasviel/control_v11p_sd15s2_lineart_anime",
-    # "shuffle": "lllyasviel/control_v11e_sd15_shuffle",
-    # "ip2p": "lllyasviel/control_v11e_sd15_ip2p",
-    # "inpaint": "lllyasviel/control_v11e_sd15_inpaint",
-    # "texnet": "/home/jyang/projects/ObjectReal/logs/train_texnet_deploy/checkpoint-55000/controlnet" # load and call
-    "texnet": "jingyangcarl/texnet",
-}
-def download_all_controlnet_weights() -> None:
-    for model_id in CONTROLNET_MODEL_IDS.values():
-        ControlNetModel.from_pretrained(model_id)
-class Model:
-    def __init__(
-        self, base_model_id: str = "stable-diffusion-v1-5/stable-diffusion-v1-5", task_name: str = "Canny"
-    ) -> None:
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.base_model_id = ""
-        self.task_name = ""
-        self.pipe = self.load_pipe(base_model_id, task_name)
-        self.pipe_base = StableDiffusionImg2ImgPipeline.from_pretrained(
-            'runwayml/stable-diffusion-v1-5', safety_checker=None, torch_dtype=torch.float16
-        ).to(self.device)
-        self.preprocessor = Preprocessor()
-        # set up pipe_rgb2x
-        self.pipe_rgb2x = StableDiffusionAOVMatEstPipeline.from_pretrained(
-            "zheng95z/rgb-to-x",
-            torch_dtype=torch.float16,
-        ).to(self.device)
-        self.pipe_rgb2x.scheduler = DDIMScheduler.from_config(
-            self.pipe_rgb2x.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
-        )
-        self.pipe_rgb2x.set_progress_bar_config(disable=True)
-        # setup blender
-        self.blender_path = '/tmp/blender-3.2.2-linux-x64/blender'
-        if not os.path.exists(self.blender_path):
-            print("Downloading Blender...")
-            subprocess.run(["wget", "https://download.blender.org/release/Blender3.2/blender-3.2.2-linux-x64.tar.xz", "-O", "/tmp/blender-3.2.2-linux-x64.tar.xz"], check=True)
-            subprocess.run(["tar", "-xf", "/tmp/blender-3.2.2-linux-x64.tar.xz", "-C", "/tmp"], check=True)
-            print("Blender downloaded and extracted.")
-    def load_pipe(self, base_model_id: str, task_name: str) -> DiffusionPipeline:
-        if (
-            base_model_id == self.base_model_id
-            and task_name == self.task_name
-            and hasattr(self, "pipe")
-            and self.pipe is not None
-        ):
-            return self.pipe
-        model_id = CONTROLNET_MODEL_IDS[task_name]
-        controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
-        to_upload = False
-        if to_upload:
-            # confirm before uploading
-            confirm = input(f"Do you want to upload {model_id} to the hub? (y/n): ")
-            if confirm.lower() == "y":
-                controlnet.push_to_hub("jingyangcarl/texnet")
-            else:
-                print("Upload cancelled.")
-        pipe = StableDiffusionControlNetPipeline.from_pretrained(
-            base_model_id, safety_checker=None, controlnet=controlnet, torch_dtype=torch.float16
-        )
-        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
-        pipe.to(self.device)
-        if self.device.type == "cuda":
-            import os
-            if os.environ.get("SPACES_ZERO_GPU", "0") == "1":
-                # when running on ZeroGPU, enable CPU offload
-                # pipe.enable_xformers_memory_efficient_attention() doens't work
-                # pipe.enable_model_cpu_offload()
-                pass
-            else:
-                pipe.enable_xformers_memory_efficient_attention()
-        torch.cuda.empty_cache()
-        gc.collect()
-        self.base_model_id = base_model_id
-        self.task_name = task_name
-        return pipe
-    def set_base_model(self, base_model_id: str) -> str:
-        if not base_model_id or base_model_id == self.base_model_id:
-            return self.base_model_id
-        del self.pipe
-        torch.cuda.empty_cache()
-        gc.collect()
-        try:
-            self.pipe = self.load_pipe(base_model_id, self.task_name)
-        except Exception:  # noqa: BLE001
-            self.pipe = self.load_pipe(self.base_model_id, self.task_name)
-        return self.base_model_id
-    def load_controlnet_weight(self, task_name: str) -> None:
-        if task_name == self.task_name:
-            return
-        if self.pipe is not None and hasattr(self.pipe, "controlnet"):
-            del self.pipe.controlnet
-        torch.cuda.empty_cache()
-        gc.collect()
-        model_id = CONTROLNET_MODEL_IDS[task_name]
-        controlnet = ControlNetModel.from_pretrained(model_id, torch_dtype=torch.float16)
-        controlnet.to(self.device)
-        torch.cuda.empty_cache()
-        gc.collect()
-        self.pipe.controlnet = controlnet
-        self.task_name = task_name
-    def get_prompt(self, prompt: str, additional_prompt: str) -> str:
-        return additional_prompt if not prompt else f"{prompt}, {additional_prompt}"
-    # @spaces.GPU #[uncomment to use ZeroGPU]
-    @torch.autocast("cuda")
-    def run_pipe(
-        self,
-        prompt: str,
-        negative_prompt: str,
-        control_image: PIL.Image.Image,
-        num_images: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        generator = torch.Generator().manual_seed(seed)
-        # self.pipe.to(self.device)
-        return self.pipe(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            guidance_scale=guidance_scale,
-            num_images_per_prompt=num_images,
-            num_inference_steps=num_steps,
-            generator=generator,
-            image=control_image,
-        ).images
-    # @spaces.GPU #[uncomment to use ZeroGPU]
-    @torch.inference_mode()
-    def process_texnet(
-        self,
-        obj_name: str,
-        represented_image: np.ndarray | None, # not used
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        low_threshold: int,
-        high_threshold: int,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        prompt_nospace = prompt.replace(' ', '_')
-        # self.preprocessor.load("texnet")
-        # control_image = self.preprocessor(
-        #     image=image, low_threshold=low_threshold, high_threshold=high_threshold, image_resolution=image_resolution, output_type="pil"
-        # )
-        # self.load_controlnet_weight("texnet")
-        # tex_coarse = self.run_pipe(
-        #     prompt=self.get_prompt(prompt, additional_prompt),
-        #     negative_prompt=negative_prompt,
-        #     control_image=control_image,
-        #     num_images=num_images,
-        #     num_steps=num_steps,
-        #     guidance_scale=guidance_scale,
-        #     seed=seed,
-        # )
-        # # use img2img pipeline
-        # self.pipe_backup = self.pipe
-        # self.pipe = self.pipe_base
-        # # refine
-        tex_fine = []
-        mesh_fine = []
-        # for result_coarse in tex_coarse:
-        #     # clean up GPU cache
-        #     torch.cuda.empty_cache()
-        #     gc.collect()
-        #     # masking
-        #     mask = (np.array(control_image).sum(axis=-1) == 0)[...,None]
-        #     image_masked = PIL.Image.fromarray(np.where(mask, control_image, result_coarse))
-        #     image_blurry = transforms.GaussianBlur(kernel_size=5, sigma=1)(image_masked)
-        #     result_fine = self.run_pipe(
-        #         # prompt=prompt,
-        #         prompt=self.get_prompt(prompt, additional_prompt),
-        #         negative_prompt=negative_prompt,
-        #         control_image=image_blurry,
-        #         num_images=1,
-        #         num_steps=num_steps,
-        #         guidance_scale=guidance_scale,
-        #         seed=seed,
-        #     )[0]
-        #     result_fine = PIL.Image.fromarray(np.where(mask, control_image, result_fine))
-        #     tex_fine.append(result_fine)
-        temp_out_path = tempfile.mkdtemp()
-        temp_out_path = 'output'
-        # put text2tex here,
-        args = init_args()
-        args.input_dir = f'examples/{obj_name}/'
-        args.output_dir = os.path.join(temp_out_path, f'{obj_name}/{prompt_nospace}')
-        args.obj_name = obj_name
-        args.obj_file = 'mesh.obj'
-        args.prompt = f'{prompt} {obj_name}'
-        args.add_view_to_prompt = True
-        args.ddim_steps = 5
-        # args.ddim_steps = 50
-        args.new_strength = 1.0
-        args.update_strength = 0.3
-        args.view_threshold = 0.1
-        args.blend = 0
-        args.dist = 1
-        args.num_viewpoints = 2
-        # args.num_viewpoints = 36
-        args.viewpoint_mode = 'predefined'
-        args.use_principle = True
-        args.update_steps = 2
-        # args.update_steps = 20
-        args.update_mode = 'heuristic'
-        args.seed = 42
-        args.post_process = True
-        args.device = '2080'
-        args.uv_size = 1000
-        args.image_size = 512
-        # args.image_size = 768
-        args.use_objaverse = True  # assume the mesh is normalized with y-axis as up
-        output_dir = text2tex_call(args)
-        # get the texture and mesh with underscore '_post', which is the id of the last mesh, should be good for the visual
-        post_idx = glob(os.path.join(output_dir, 'update', 'mesh', "*_post.png"))[0].split('/')[-1].split('_')[0]
-        tex_fine.append(PIL.Image.open(os.path.join(output_dir, 'update', 'mesh', f"{post_idx}.png")).convert("RGB"))
-        mesh_fine.append(os.path.join(output_dir, 'update', 'mesh', f"{post_idx}.obj"))
-        torch.cuda.empty_cache()
-        # restore the original pipe
-        # self.pipe = self.pipe_backup
-        # use rgb2x for now for generating the texture
-        def rgb2x(
-            pipeline,
-            photo,
-            inference_step = 50,
-            num_samples = 1,
-        ):
-            generator = torch.Generator(device="cuda").manual_seed(seed)
-            # Check if the width and height are multiples of 8. If not, crop it using torchvision.transforms.CenterCrop
-            old_height = photo.shape[1]
-            old_width = photo.shape[2]
-            new_height = old_height
-            new_width = old_width
-            radio = old_height / old_width
-            max_side = 1000
-            if old_height > old_width:
-                new_height = max_side
-                new_width = int(new_height / radio)
-            else:
-                new_width = max_side
-                new_height = int(new_width * radio)
-            if new_width % 8 != 0 or new_height % 8 != 0:
-                new_width = new_width // 8 * 8
-                new_height = new_height // 8 * 8
-            photo = torchvision.transforms.Resize((new_height, new_width))(photo)
-            required_aovs = ["albedo", "normal", "roughness", "metallic", "irradiance"]
-            prompts = {
-                "albedo": "Albedo (diffuse basecolor)",
-                "normal": "Camera-space Normal",
-                "roughness": "Roughness",
-                "metallic": "Metallicness",
-                "irradiance": "Irradiance (diffuse lighting)",
-            }
-            return_list = []
-            for i in tqdm(range(num_samples), desc="Running Pipeline", leave=False):
-                for aov_name in required_aovs:
-                    prompt = prompts[aov_name]
-                    generated_image = pipeline(
-                        prompt=prompt,
-                        photo=photo,
-                        num_inference_steps=inference_step,
-                        height=new_height,
-                        width=new_width,
-                        generator=generator,
-                        required_aovs=[aov_name],
-                    ).images[0][0]
-                    generated_image = torchvision.transforms.Resize(
-                        (old_height, old_width)
-                    )(generated_image)
-                    # generated_image = (generated_image, f"Generated {aov_name} {i}")
-                    # generated_image = (generated_image, f"{aov_name}")
-                    return_list.append(generated_image)
-            return photo, return_list, prompts
-        # Load rgb2x pipeline
-        _, preds, prompts = rgb2x(self.pipe_rgb2x, torchvision.transforms.PILToTensor()(tex_fine[0]).to(self.pipe.device), inference_step=num_steps, num_samples=num_images)
-        intrinsic_dir = os.path.join(output_dir, 'intrinsic')
-        use_text2tex = True
-        if use_text2tex:
-            base_color_path = image_to_temp_path(tex_fine[0], "base_color", out_dir=intrinsic_dir)
-            normal_map_path = image_to_temp_path(preds[0], "normal_map", out_dir=intrinsic_dir)
-            roughness_path = image_to_temp_path(preds[1], "roughness", out_dir=intrinsic_dir)
-            metallic_path = image_to_temp_path(preds[2], "metallic", out_dir=intrinsic_dir)
-        else:
-            base_color_path = image_to_temp_path(tex_fine[0].rotate(90), "base_color", out_dir=intrinsic_dir)
-            normal_map_path = image_to_temp_path(preds[0].rotate(90), "normal_map", out_dir=intrinsic_dir)
-            roughness_path = image_to_temp_path(preds[1].rotate(90), "roughness", out_dir=intrinsic_dir)
-            metallic_path = image_to_temp_path(preds[2].rotate(90), "metallic", out_dir=intrinsic_dir)
-        current_timecode = time.strftime("%Y%m%d_%H%M%S")
-        # output_blend_path = os.path.join(os.getcwd(), "output", f"{obj_name}_{prompt_nospace}_{current_timecode}.blend")  # replace with desired output path
-        output_blend_path = os.path.join(tempfile.mkdtemp(), f"{obj_name}_{prompt_nospace}_{current_timecode}.blend")  # replace with desired output path
-        os.makedirs(os.path.dirname(output_blend_path), exist_ok=True)
-        def run_blend_generation(
-            blender_path,
-            generate_script_path,
-            obj_path,
-            base_color_path,
-            normal_map_path,
-            roughness_path,
-            metallic_path,
-            output_blend
-        ):
-            cmd = [
-                blender_path, "--background", "--python", generate_script_path, "--",
-                obj_path, base_color_path, normal_map_path, roughness_path, metallic_path, output_blend
-            ]
-            subprocess.run(cmd, check=True)
-        # check if the blender_path exists, if not download
-        run_blend_generation(
-            blender_path=self.blender_path,
-            generate_script_path="rgb2x/generate_blend.py",
-            # obj_path=f"examples/{obj_name}/mesh.obj",  # replace with actual mesh path
-            obj_path=mesh_fine[0],  # replace with actual mesh path
-            base_color_path=base_color_path,
-            normal_map_path=normal_map_path,
-            roughness_path=roughness_path,
-            metallic_path=metallic_path,
-            output_blend=output_blend_path  # replace with desired output path
-        )
-        # gallary
-        return [*tex_fine], [preds[1]], [preds[2]], [preds[3]], [output_blend_path]
-    # @spaces.GPU #[uncomment to use ZeroGPU]
-    @torch.inference_mode()
-    def process_canny(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        low_threshold: int,
-        high_threshold: int,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        self.preprocessor.load("Canny")
-        control_image = self.preprocessor(
-            image=image, low_threshold=low_threshold, high_threshold=high_threshold, detect_resolution=image_resolution
-        )
-        self.load_controlnet_weight("Canny")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_mlsd(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        value_threshold: float,
-        distance_threshold: float,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        self.preprocessor.load("MLSD")
-        control_image = self.preprocessor(
-            image=image,
-            image_resolution=image_resolution,
-            detect_resolution=preprocess_resolution,
-            thr_v=value_threshold,
-            thr_d=distance_threshold,
-        )
-        self.load_controlnet_weight("MLSD")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_scribble(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        elif preprocessor_name == "HED":
-            self.preprocessor.load(preprocessor_name)
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                scribble=False,
-            )
-        elif preprocessor_name == "PidiNet":
-            self.preprocessor.load(preprocessor_name)
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                safe=False,
-            )
-        self.load_controlnet_weight("scribble")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_scribble_interactive(
-        self,
-        image_and_mask: dict[str, np.ndarray | list[np.ndarray]] | None,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        if image_and_mask is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        image = 255 - image_and_mask["composite"]  # type: ignore
-        image = HWC3(image)
-        image = resize_image(image, resolution=image_resolution)
-        control_image = PIL.Image.fromarray(image)
-        self.load_controlnet_weight("scribble")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_softedge(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        elif preprocessor_name in ["HED", "HED safe"]:
-            safe = "safe" in preprocessor_name
-            self.preprocessor.load("HED")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                scribble=safe,
-            )
-        elif preprocessor_name in ["PidiNet", "PidiNet safe"]:
-            safe = "safe" in preprocessor_name
-            self.preprocessor.load("PidiNet")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                safe=safe,
-            )
-        else:
-            raise ValueError
-        self.load_controlnet_weight("softedge")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_openpose(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        else:
-            self.preprocessor.load("Openpose")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                hand_and_face=True,
-            )
-        self.load_controlnet_weight("Openpose")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_segmentation(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        else:
-            self.preprocessor.load(preprocessor_name)
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-            )
-        self.load_controlnet_weight("segmentation")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_depth(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        else:
-            self.preprocessor.load(preprocessor_name)
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-            )
-        self.load_controlnet_weight("depth")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_normal(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        else:
-            self.preprocessor.load("NormalBae")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-            )
-        self.load_controlnet_weight("NormalBae")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_lineart(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        preprocess_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name in ["None", "None (anime)"]:
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        elif preprocessor_name in ["Lineart", "Lineart coarse"]:
-            coarse = "coarse" in preprocessor_name
-            self.preprocessor.load("Lineart")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-                coarse=coarse,
-            )
-        elif preprocessor_name == "Lineart (anime)":
-            self.preprocessor.load("LineartAnime")
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-                detect_resolution=preprocess_resolution,
-            )
-        if "anime" in preprocessor_name:
-            self.load_controlnet_weight("lineart_anime")
-        else:
-            self.load_controlnet_weight("lineart")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_shuffle(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-        preprocessor_name: str,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        if preprocessor_name == "None":
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            control_image = PIL.Image.fromarray(image)
-        else:
-            self.preprocessor.load(preprocessor_name)
-            control_image = self.preprocessor(
-                image=image,
-                image_resolution=image_resolution,
-            )
-        self.load_controlnet_weight("shuffle")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]
-    @torch.inference_mode()
-    def process_ip2p(
-        self,
-        image: np.ndarray,
-        prompt: str,
-        additional_prompt: str,
-        negative_prompt: str,
-        num_images: int,
-        image_resolution: int,
-        num_steps: int,
-        guidance_scale: float,
-        seed: int,
-    ) -> list[PIL.Image.Image]:
-        if image is None:
-            raise ValueError
-        if image_resolution > MAX_IMAGE_RESOLUTION:
-            raise ValueError
-        if num_images > MAX_NUM_IMAGES:
-            raise ValueError
-        image = HWC3(image)
-        image = resize_image(image, resolution=image_resolution)
-        control_image = PIL.Image.fromarray(image)
-        self.load_controlnet_weight("ip2p")
-        results = self.run_pipe(
-            prompt=self.get_prompt(prompt, additional_prompt),
-            negative_prompt=negative_prompt,
-            control_image=control_image,
-            num_images=num_images,
-            num_steps=num_steps,
-            guidance_scale=guidance_scale,
-            seed=seed,
-        )
-        return [control_image, *results]

pre-requirements.txt DELETED Viewed

@@ -1,9 +0,0 @@
-accelerate
-diffusers
-invisible_watermark
-torch
-torchvision
-transformers
-xformers
-controlnet-aux # for controlnet
-spaces # no need to specify here

preprocessor.py DELETED Viewed

@@ -1,120 +0,0 @@
-import gc
-from typing import TYPE_CHECKING
-if TYPE_CHECKING:
-    from collections.abc import Callable
-import numpy as np
-import PIL.Image
-import torch
-from controlnet_aux import (
-    CannyDetector,
-    ContentShuffleDetector,
-    HEDdetector,
-    LineartAnimeDetector,
-    LineartDetector,
-    MidasDetector,
-    MLSDdetector,
-    NormalBaeDetector,
-    OpenposeDetector,
-    PidiNetDetector,
-)
-from controlnet_aux.util import HWC3
-from cv_utils import resize_image
-from depth_estimator import DepthEstimator
-from image_segmentor import ImageSegmentor
-class Preprocessor:
-    MODEL_ID = "lllyasviel/Annotators"
-    def __init__(self) -> None:
-        self.model: Callable = None  # type: ignore
-        self.name = ""
-    def load(self, name: str) -> None:  # noqa: C901, PLR0912
-        if name == self.name:
-            return
-        if name == "HED":
-            self.model = HEDdetector.from_pretrained(self.MODEL_ID)
-        elif name == "Midas":
-            self.model = MidasDetector.from_pretrained(self.MODEL_ID)
-        elif name == "MLSD":
-            self.model = MLSDdetector.from_pretrained(self.MODEL_ID)
-        elif name == "Openpose":
-            self.model = OpenposeDetector.from_pretrained(self.MODEL_ID)
-        elif name == "PidiNet":
-            self.model = PidiNetDetector.from_pretrained(self.MODEL_ID)
-        elif name == "NormalBae":
-            self.model = NormalBaeDetector.from_pretrained(self.MODEL_ID)
-        elif name == "Lineart":
-            self.model = LineartDetector.from_pretrained(self.MODEL_ID)
-        elif name == "LineartAnime":
-            self.model = LineartAnimeDetector.from_pretrained(self.MODEL_ID)
-        elif name == "Canny":
-            self.model = CannyDetector()
-        elif name == "ContentShuffle":
-            self.model = ContentShuffleDetector()
-        elif name == "DPT":
-            self.model = DepthEstimator()
-        elif name == "UPerNet":
-            self.model = ImageSegmentor()
-        elif name == 'texnet':
-            self.model = TexnetPreprocessor()
-        else:
-            raise ValueError
-        torch.cuda.empty_cache()
-        gc.collect()
-        self.name = name
-    def __call__(self, image: PIL.Image.Image, **kwargs) -> PIL.Image.Image:  # noqa: ANN003
-        if self.name == "Canny":
-            if "detect_resolution" in kwargs:
-                detect_resolution = kwargs.pop("detect_resolution")
-                image = np.array(image)
-                image = HWC3(image)
-                image = resize_image(image, resolution=detect_resolution)
-            image = self.model(image, **kwargs)
-            return PIL.Image.fromarray(image)
-        if self.name == "Midas":
-            detect_resolution = kwargs.pop("detect_resolution", 512)
-            image_resolution = kwargs.pop("image_resolution", 512)
-            image = np.array(image)
-            image = HWC3(image)
-            image = resize_image(image, resolution=detect_resolution)
-            image = self.model(image, **kwargs)
-            image = HWC3(image)
-            image = resize_image(image, resolution=image_resolution)
-            return PIL.Image.fromarray(image)
-        return self.model(image, **kwargs)
-# https://github.com/huggingface/controlnet_aux/blob/master/src/controlnet_aux/canny/__init__.py
-class TexnetPreprocessor:
-    def __call__(self, input_image=None, low_threshold=100, high_threshold=200, image_resolution=512, output_type=None, **kwargs):
-        if "img" in kwargs:
-            warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
-            input_image = kwargs.pop("img")
-        if input_image is None:
-            raise ValueError("input_image must be defined.")
-        if not isinstance(input_image, np.ndarray):
-            input_image = np.array(input_image, dtype=np.uint8)
-            output_type = output_type or "pil"
-        else:
-            output_type = output_type or "np"
-        input_image = HWC3(input_image)
-        input_image = resize_image(input_image, image_resolution)
-        H, W, C = input_image.shape
-        # detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
-        output_image = input_image.copy()
-        if output_type == "pil":
-            # detected_map = Image.fromarray(detected_map)
-            output_image = PIL.Image.fromarray(output_image)
-        return output_image

push_dataset.py DELETED Viewed

@@ -1,9 +0,0 @@
-from huggingface_hub import HfApi
-api = HfApi()
-api.upload_folder(
-    folder_path="./examples",
-    repo_id="jingyangcarl/matgen",
-    repo_type="space",
-    path_in_repo="examples", # Upload to a specific folder
-)

requirements.txt DELETED Viewed

@@ -1,9 +0,0 @@
-torch
-torchvision
-pytorch3d @ git+https://github.com/facebookresearch/pytorch3d.git@stable
-trimesh
-xatlas
-scikit-learn
-opencv-python
-matplotlib
-omegaconf

rgb2x/generate_blend.py DELETED Viewed

@@ -1,142 +0,0 @@
-import bpy
-import sys
-import os
-def create_tex_node(nodes, img_path, label, color_space, location):
-    img = bpy.data.images.load(img_path)
-    tex = nodes.new(type='ShaderNodeTexImage')
-    tex.image = img
-    tex.label = label
-    tex.location = location
-    tex.image.colorspace_settings.name = color_space
-    return tex
-def setup_environment_lighting(hdri_path):
-    if not bpy.data.worlds:
-        bpy.data.worlds.new(name="World")
-    if bpy.context.scene.world is None:
-        bpy.context.scene.world = bpy.data.worlds[0]
-    world = bpy.context.scene.world
-    world.use_nodes = True
-    nodes = world.node_tree.nodes
-    links = world.node_tree.links
-    nodes.clear()
-    env_tex = nodes.new(type="ShaderNodeTexEnvironment")
-    env_tex.image = bpy.data.images.load(hdri_path)
-    env_tex.location = (-300, 0)
-    bg = nodes.new(type="ShaderNodeBackground")
-    bg.location = (0, 0)
-    output = nodes.new(type="ShaderNodeOutputWorld")
-    output.location = (300, 0)
-    links.new(env_tex.outputs["Color"], bg.inputs["Color"])
-    links.new(bg.outputs["Background"], output.inputs["Surface"])
-def setup_gpu_rendering():
-    bpy.context.scene.render.engine = 'CYCLES'
-    prefs = bpy.context.preferences
-    cprefs = prefs.addons['cycles'].preferences
-    # Choose backend depending on GPU type: 'CUDA', 'OPTIX', 'HIP', 'METAL'
-    cprefs.compute_device_type = 'CUDA'
-    bpy.context.scene.cycles.device = 'GPU'
-def generate_blend(obj_path, base_color_path, normal_map_path, roughness_path, metallic_path, output_blend):
-    # Reset scene
-    bpy.ops.wm.read_factory_settings(use_empty=True)
-    # Import OBJ
-    bpy.ops.import_scene.obj(filepath=obj_path)
-    obj = bpy.context.selected_objects[0]
-    # Create material
-    mat = bpy.data.materials.new(name="BRDF_Material")
-    mat.use_nodes = True
-    nodes = mat.node_tree.nodes
-    links = mat.node_tree.links
-    nodes.clear()
-    output = nodes.new(type='ShaderNodeOutputMaterial')
-    output.location = (400, 0)
-    principled = nodes.new(type='ShaderNodeBsdfPrincipled')
-    principled.location = (100, 0)
-    links.new(principled.outputs['BSDF'], output.inputs['Surface'])
-    # Base Color
-    base_color = create_tex_node(nodes, base_color_path, "Base Color", 'sRGB', (-600, 200))
-    links.new(base_color.outputs['Color'], principled.inputs['Base Color'])
-    # Roughness
-    rough = create_tex_node(nodes, roughness_path, "Roughness", 'Non-Color', (-600, 0))
-    links.new(rough.outputs['Color'], principled.inputs['Roughness'])
-    # Metallic
-    metal = create_tex_node(nodes, metallic_path, "Metallic", 'Non-Color', (-600, -200))
-    links.new(metal.outputs['Color'], principled.inputs['Metallic'])
-    # Normal Map
-    normal_tex = create_tex_node(nodes, normal_map_path, "Normal Map", 'Non-Color', (-800, -400))
-    normal_map = nodes.new(type='ShaderNodeNormalMap')
-    normal_map.location = (-400, -400)
-    links.new(normal_tex.outputs['Color'], normal_map.inputs['Color'])
-    links.new(normal_map.outputs['Normal'], principled.inputs['Normal'])
-    # Assign material
-    if obj.data.materials:
-        obj.data.materials[0] = mat
-    else:
-        obj.data.materials.append(mat)
-    # Global Illumination using Blender's default forest HDRI
-    blender_data_path = bpy.utils.resource_path('LOCAL')
-    forest_hdri_path = os.path.join(blender_data_path, "datafiles", "studiolights", "world", "forest.exr")
-    print(f"Using HDRI: {forest_hdri_path}")
-    setup_environment_lighting(forest_hdri_path)
-    # GPU rendering setup
-    setup_gpu_rendering()
-    # Pack textures into .blend
-    bpy.ops.file.pack_all()
-    # Set the 3D View to Rendered mode and focus on object
-    for area in bpy.context.screen.areas:
-        if area.type == 'VIEW_3D':
-            for space in area.spaces:
-                if space.type == 'VIEW_3D':
-                    space.shading.type = 'RENDERED'  # Set viewport shading to Rendered
-            for region in area.regions:
-                if region.type == 'WINDOW':
-                    override = {'area': area, 'region': region, 'scene': bpy.context.scene}
-                    bpy.ops.view3d.view_all(override, center=True)
-        elif area.type == 'NODE_EDITOR':
-            for space in area.spaces:
-                if space.type == 'NODE_EDITOR':
-                    space.tree_type = 'ShaderNodeTree'  # Switch to Shader Editor
-                    space.shader_type = 'OBJECT'
-    # Optional: Switch active workspace to Shading (if it exists)
-    for workspace in bpy.data.workspaces:
-        if workspace.name == 'Shading':
-            bpy.context.window.workspace = workspace
-            break
-    # Save the .blend file
-    bpy.ops.wm.save_as_mainfile(filepath=output_blend)
-    print(f"✅ Saved .blend file with BRDF, HDRI, GPU: {output_blend}")
-if __name__ == "__main__":
-    argv = sys.argv
-    argv = argv[argv.index("--") + 1:]  # Only use args after "--"
-    if len(argv) != 6:
-        print("Usage:\n  blender --background --python generate_blend.py -- obj base_color normal roughness metallic output.blend")
-        sys.exit(1)
-    generate_blend(*argv)

rgb2x/gradio_demo_rgb2x.py DELETED Viewed

@@ -1,157 +0,0 @@
-import os
-os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
-import gradio as gr
-import torch
-import torchvision
-from diffusers import DDIMScheduler
-from load_image import load_exr_image, load_ldr_image
-from pipeline_rgb2x import StableDiffusionAOVMatEstPipeline
-current_directory = os.path.dirname(os.path.abspath(__file__))
-def get_rgb2x_demo():
-    # Load pipeline
-    pipe = StableDiffusionAOVMatEstPipeline.from_pretrained(
-        "zheng95z/rgb-to-x",
-        torch_dtype=torch.float16,
-        cache_dir=os.path.join(current_directory, "model_cache"),
-    ).to("cuda")
-    pipe.scheduler = DDIMScheduler.from_config(
-        pipe.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
-    )
-    pipe.set_progress_bar_config(disable=True)
-    pipe.to("cuda")
-    # Augmentation
-    def callback(
-        photo,
-        seed,
-        inference_step,
-        num_samples,
-    ):
-        generator = torch.Generator(device="cuda").manual_seed(seed)
-        if photo.name.endswith(".exr"):
-            photo = load_exr_image(photo.name, tonemaping=True, clamp=True).to("cuda")
-        elif (
-            photo.name.endswith(".png")
-            or photo.name.endswith(".jpg")
-            or photo.name.endswith(".jpeg")
-        ):
-            photo = load_ldr_image(photo.name, from_srgb=True).to("cuda")
-        # Check if the width and height are multiples of 8. If not, crop it using torchvision.transforms.CenterCrop
-        old_height = photo.shape[1]
-        old_width = photo.shape[2]
-        new_height = old_height
-        new_width = old_width
-        radio = old_height / old_width
-        max_side = 1000
-        if old_height > old_width:
-            new_height = max_side
-            new_width = int(new_height / radio)
-        else:
-            new_width = max_side
-            new_height = int(new_width * radio)
-        if new_width % 8 != 0 or new_height % 8 != 0:
-            new_width = new_width // 8 * 8
-            new_height = new_height // 8 * 8
-        photo = torchvision.transforms.Resize((new_height, new_width))(photo)
-        required_aovs = ["albedo", "normal", "roughness", "metallic", "irradiance"]
-        prompts = {
-            "albedo": "Albedo (diffuse basecolor)",
-            "normal": "Camera-space Normal",
-            "roughness": "Roughness",
-            "metallic": "Metallicness",
-            "irradiance": "Irradiance (diffuse lighting)",
-        }
-        return_list = []
-        for i in range(num_samples):
-            for aov_name in required_aovs:
-                prompt = prompts[aov_name]
-                generated_image = pipe(
-                    prompt=prompt,
-                    photo=photo,
-                    num_inference_steps=inference_step,
-                    height=new_height,
-                    width=new_width,
-                    generator=generator,
-                    required_aovs=[aov_name],
-                ).images[0][0]
-                generated_image = torchvision.transforms.Resize(
-                    (old_height, old_width)
-                )(generated_image)
-                generated_image = (generated_image, f"Generated {aov_name} {i}")
-                return_list.append(generated_image)
-        return return_list
-    block = gr.Blocks()
-    with block:
-        with gr.Row():
-            gr.Markdown("## Model RGB -> X (Realistic image -> Intrinsic channels)")
-        with gr.Row():
-            # Input side
-            with gr.Column():
-                gr.Markdown("### Given Image")
-                photo = gr.File(label="Photo", file_types=[".exr", ".png", ".jpg"])
-                gr.Markdown("### Parameters")
-                run_button = gr.Button(value="Run")
-                with gr.Accordion("Advanced options", open=False):
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=-1,
-                        maximum=2147483647,
-                        step=1,
-                        randomize=True,
-                    )
-                    inference_step = gr.Slider(
-                        label="Inference Step",
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=50,
-                    )
-                    num_samples = gr.Slider(
-                        label="Samples",
-                        minimum=1,
-                        maximum=100,
-                        step=1,
-                        value=1,
-                    )
-            # Output side
-            with gr.Column():
-                gr.Markdown("### Output Gallery")
-                result_gallery = gr.Gallery(
-                    label="Output",
-                    show_label=False,
-                    elem_id="gallery",
-                    columns=2,
-                )
-        inputs = [
-            photo,
-            seed,
-            inference_step,
-            num_samples,
-        ]
-        run_button.click(fn=callback, inputs=inputs, outputs=result_gallery, queue=True)
-    return block
-if __name__ == "__main__":
-    demo = get_rgb2x_demo()
-    demo.queue(max_size=1)
-    demo.launch()

rgb2x/load_image.py DELETED Viewed

@@ -1,119 +0,0 @@
-import os
-import cv2
-import torch
-os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
-import numpy as np
-def convert_rgb_2_XYZ(rgb):
-    # Reference: https://web.archive.org/web/20191027010220/http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
-    # rgb: (h, w, 3)
-    # XYZ: (h, w, 3)
-    XYZ = torch.ones_like(rgb)
-    XYZ[:, :, 0] = (
-        0.4124564 * rgb[:, :, 0] + 0.3575761 * rgb[:, :, 1] + 0.1804375 * rgb[:, :, 2]
-    )
-    XYZ[:, :, 1] = (
-        0.2126729 * rgb[:, :, 0] + 0.7151522 * rgb[:, :, 1] + 0.0721750 * rgb[:, :, 2]
-    )
-    XYZ[:, :, 2] = (
-        0.0193339 * rgb[:, :, 0] + 0.1191920 * rgb[:, :, 1] + 0.9503041 * rgb[:, :, 2]
-    )
-    return XYZ
-def convert_XYZ_2_Yxy(XYZ):
-    # XYZ: (h, w, 3)
-    # Yxy: (h, w, 3)
-    Yxy = torch.ones_like(XYZ)
-    Yxy[:, :, 0] = XYZ[:, :, 1]
-    sum = torch.sum(XYZ, dim=2)
-    inv_sum = 1.0 / torch.clamp(sum, min=1e-4)
-    Yxy[:, :, 1] = XYZ[:, :, 0] * inv_sum
-    Yxy[:, :, 2] = XYZ[:, :, 1] * inv_sum
-    return Yxy
-def convert_rgb_2_Yxy(rgb):
-    # rgb: (h, w, 3)
-    # Yxy: (h, w, 3)
-    return convert_XYZ_2_Yxy(convert_rgb_2_XYZ(rgb))
-def convert_XYZ_2_rgb(XYZ):
-    # XYZ: (h, w, 3)
-    # rgb: (h, w, 3)
-    rgb = torch.ones_like(XYZ)
-    rgb[:, :, 0] = (
-        3.2404542 * XYZ[:, :, 0] - 1.5371385 * XYZ[:, :, 1] - 0.4985314 * XYZ[:, :, 2]
-    )
-    rgb[:, :, 1] = (
-        -0.9692660 * XYZ[:, :, 0] + 1.8760108 * XYZ[:, :, 1] + 0.0415560 * XYZ[:, :, 2]
-    )
-    rgb[:, :, 2] = (
-        0.0556434 * XYZ[:, :, 0] - 0.2040259 * XYZ[:, :, 1] + 1.0572252 * XYZ[:, :, 2]
-    )
-    return rgb
-def convert_Yxy_2_XYZ(Yxy):
-    # Yxy: (h, w, 3)
-    # XYZ: (h, w, 3)
-    XYZ = torch.ones_like(Yxy)
-    XYZ[:, :, 0] = Yxy[:, :, 1] / torch.clamp(Yxy[:, :, 2], min=1e-6) * Yxy[:, :, 0]
-    XYZ[:, :, 1] = Yxy[:, :, 0]
-    XYZ[:, :, 2] = (
-        (1.0 - Yxy[:, :, 1] - Yxy[:, :, 2])
-        / torch.clamp(Yxy[:, :, 2], min=1e-4)
-        * Yxy[:, :, 0]
-    )
-    return XYZ
-def convert_Yxy_2_rgb(Yxy):
-    # Yxy: (h, w, 3)
-    # rgb: (h, w, 3)
-    return convert_XYZ_2_rgb(convert_Yxy_2_XYZ(Yxy))
-def load_ldr_image(image_path, from_srgb=False, clamp=False, normalize=False):
-    # Load png or jpg image
-    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
-    image = torch.from_numpy(image.astype(np.float32) / 255.0)  # (h, w, c)
-    image[~torch.isfinite(image)] = 0
-    if from_srgb:
-        # Convert from sRGB to linear RGB
-        image = image**2.2
-    if clamp:
-        image = torch.clamp(image, min=0.0, max=1.0)
-    if normalize:
-        # Normalize to [-1, 1]
-        image = image * 2.0 - 1.0
-        image = torch.nn.functional.normalize(image, dim=-1, eps=1e-6)
-    return image.permute(2, 0, 1)  # returns (c, h, w)
-def load_exr_image(image_path, tonemaping=False, clamp=False, normalize=False):
-    image = cv2.cvtColor(cv2.imread(image_path, -1), cv2.COLOR_BGR2RGB)
-    image = torch.from_numpy(image.astype("float32"))  # (h, w, c)
-    image[~torch.isfinite(image)] = 0
-    if tonemaping:
-        # Exposure adjuestment
-        image_Yxy = convert_rgb_2_Yxy(image)
-        lum = (
-            image[:, :, 0:1] * 0.2125
-            + image[:, :, 1:2] * 0.7154
-            + image[:, :, 2:3] * 0.0721
-        )
-        lum = torch.log(torch.clamp(lum, min=1e-6))
-        lum_mean = torch.exp(torch.mean(lum))
-        lp = image_Yxy[:, :, 0:1] * 0.18 / torch.clamp(lum_mean, min=1e-6)
-        image_Yxy[:, :, 0:1] = lp
-        image = convert_Yxy_2_rgb(image_Yxy)
-    if clamp:
-        image = torch.clamp(image, min=0.0, max=1.0)
-    if normalize:
-        image = torch.nn.functional.normalize(image, dim=-1, eps=1e-6)
-    return image.permute(2, 0, 1)  # returns (c, h, w)

rgb2x/pipeline_rgb2x.py DELETED Viewed

@@ -1,821 +0,0 @@
-import inspect
-from dataclasses import dataclass
-from typing import Callable, List, Optional, Union
-import numpy as np
-import PIL
-import torch
-from diffusers.configuration_utils import register_to_config
-from diffusers.image_processor import VaeImageProcessor
-from diffusers.loaders import (
-    LoraLoaderMixin,
-    TextualInversionLoaderMixin,
-)
-from diffusers.models import AutoencoderKL, UNet2DConditionModel
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import (
-    rescale_noise_cfg,
-)
-from diffusers.schedulers import KarrasDiffusionSchedulers
-from diffusers.utils import (
-    CONFIG_NAME,
-    BaseOutput,
-    deprecate,
-    logging,
-)
-from diffusers.utils.torch_utils import randn_tensor
-from transformers import CLIPTextModel, CLIPTokenizer
-logger = logging.get_logger(__name__)
-class VaeImageProcrssorAOV(VaeImageProcessor):
-    """
-    Image processor for VAE AOV.
-    Args:
-        do_resize (`bool`, *optional*, defaults to `True`):
-            Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`.
-        vae_scale_factor (`int`, *optional*, defaults to `8`):
-            VAE scale factor. If `do_resize` is `True`, the image is automatically resized to multiples of this factor.
-        resample (`str`, *optional*, defaults to `lanczos`):
-            Resampling filter to use when resizing the image.
-        do_normalize (`bool`, *optional*, defaults to `True`):
-            Whether to normalize the image to [-1,1].
-    """
-    config_name = CONFIG_NAME
-    @register_to_config
-    def __init__(
-        self,
-        do_resize: bool = True,
-        vae_scale_factor: int = 8,
-        resample: str = "lanczos",
-        do_normalize: bool = True,
-    ):
-        super().__init__()
-    def postprocess(
-        self,
-        image: torch.FloatTensor,
-        output_type: str = "pil",
-        do_denormalize: Optional[List[bool]] = None,
-        do_gamma_correction: bool = True,
-    ):
-        if not isinstance(image, torch.Tensor):
-            raise ValueError(
-                f"Input for postprocessing is in incorrect format: {type(image)}. We only support pytorch tensor"
-            )
-        if output_type not in ["latent", "pt", "np", "pil"]:
-            deprecation_message = (
-                f"the output_type {output_type} is outdated and has been set to `np`. Please make sure to set it to one of these instead: "
-                "`pil`, `np`, `pt`, `latent`"
-            )
-            deprecate(
-                "Unsupported output_type",
-                "1.0.0",
-                deprecation_message,
-                standard_warn=False,
-            )
-            output_type = "np"
-        if output_type == "latent":
-            return image
-        if do_denormalize is None:
-            do_denormalize = [self.config.do_normalize] * image.shape[0]
-        image = torch.stack(
-            [
-                self.denormalize(image[i]) if do_denormalize[i] else image[i]
-                for i in range(image.shape[0])
-            ]
-        )
-        # Gamma correction
-        if do_gamma_correction:
-            image = torch.pow(image, 1.0 / 2.2)
-        if output_type == "pt":
-            return image
-        image = self.pt_to_numpy(image)
-        if output_type == "np":
-            return image
-        if output_type == "pil":
-            return self.numpy_to_pil(image)
-    def preprocess_normal(
-        self,
-        image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray],
-        height: Optional[int] = None,
-        width: Optional[int] = None,
-    ) -> torch.Tensor:
-        image = torch.stack([image], axis=0)
-        return image
-@dataclass
-class StableDiffusionAOVPipelineOutput(BaseOutput):
-    """
-    Output class for Stable Diffusion AOV pipelines.
-    Args:
-        images (`List[PIL.Image.Image]` or `np.ndarray`)
-            List of denoised PIL images of length `batch_size` or NumPy array of shape `(batch_size, height, width,
-            num_channels)`.
-        nsfw_content_detected (`List[bool]`)
-            List indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content or
-            `None` if safety checking could not be performed.
-    """
-    images: Union[List[PIL.Image.Image], np.ndarray]
-class StableDiffusionAOVMatEstPipeline(
-    DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin
-):
-    r"""
-    Pipeline for AOVs.
-    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
-    implemented for all pipelines (downloading, saving, running on a particular device, etc.).
-    The pipeline also inherits the following loading methods:
-        - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
-        - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
-        - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
-    Args:
-        vae ([`AutoencoderKL`]):
-            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
-        text_encoder ([`~transformers.CLIPTextModel`]):
-            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
-        tokenizer ([`~transformers.CLIPTokenizer`]):
-            A `CLIPTokenizer` to tokenize text.
-        unet ([`UNet2DConditionModel`]):
-            A `UNet2DConditionModel` to denoise the encoded image latents.
-        scheduler ([`SchedulerMixin`]):
-            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
-            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
-    """
-    def __init__(
-        self,
-        vae: AutoencoderKL,
-        text_encoder: CLIPTextModel,
-        tokenizer: CLIPTokenizer,
-        unet: UNet2DConditionModel,
-        scheduler: KarrasDiffusionSchedulers,
-    ):
-        super().__init__()
-        self.register_modules(
-            vae=vae,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-            unet=unet,
-            scheduler=scheduler,
-        )
-        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
-        self.image_processor = VaeImageProcrssorAOV(
-            vae_scale_factor=self.vae_scale_factor
-        )
-        self.register_to_config()
-    def _encode_prompt(
-        self,
-        prompt,
-        device,
-        num_images_per_prompt,
-        do_classifier_free_guidance,
-        negative_prompt=None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-    ):
-        r"""
-        Encodes the prompt into text encoder hidden states.
-        Args:
-             prompt (`str` or `List[str]`, *optional*):
-                prompt to be encoded
-            device: (`torch.device`):
-                torch device
-            num_images_per_prompt (`int`):
-                number of images that should be generated per prompt
-            do_classifier_free_guidance (`bool`):
-                whether to use classifier free guidance or not
-            negative_ prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined, one has to pass
-                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
-                less than `1`).
-            prompt_embeds (`torch.FloatTensor`, *optional*):
-                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
-                provided, text embeddings will be generated from `prompt` input argument.
-            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
-                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
-                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
-                argument.
-        """
-        if prompt is not None and isinstance(prompt, str):
-            batch_size = 1
-        elif prompt is not None and isinstance(prompt, list):
-            batch_size = len(prompt)
-        else:
-            batch_size = prompt_embeds.shape[0]
-        if prompt_embeds is None:
-            # textual inversion: procecss multi-vector tokens if necessary
-            if isinstance(self, TextualInversionLoaderMixin):
-                prompt = self.maybe_convert_prompt(prompt, self.tokenizer)
-            text_inputs = self.tokenizer(
-                prompt,
-                padding="max_length",
-                max_length=self.tokenizer.model_max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            text_input_ids = text_inputs.input_ids
-            untruncated_ids = self.tokenizer(
-                prompt, padding="longest", return_tensors="pt"
-            ).input_ids
-            if untruncated_ids.shape[-1] >= text_input_ids.shape[
-                -1
-            ] and not torch.equal(text_input_ids, untruncated_ids):
-                removed_text = self.tokenizer.batch_decode(
-                    untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1]
-                )
-                logger.warning(
-                    "The following part of your input was truncated because CLIP can only handle sequences up to"
-                    f" {self.tokenizer.model_max_length} tokens: {removed_text}"
-                )
-            if (
-                hasattr(self.text_encoder.config, "use_attention_mask")
-                and self.text_encoder.config.use_attention_mask
-            ):
-                attention_mask = text_inputs.attention_mask.to(device)
-            else:
-                attention_mask = None
-            prompt_embeds = self.text_encoder(
-                text_input_ids.to(device),
-                attention_mask=attention_mask,
-            )
-            prompt_embeds = prompt_embeds[0]
-        prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
-        bs_embed, seq_len, _ = prompt_embeds.shape
-        # duplicate text embeddings for each generation per prompt, using mps friendly method
-        prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
-        prompt_embeds = prompt_embeds.view(
-            bs_embed * num_images_per_prompt, seq_len, -1
-        )
-        # get unconditional embeddings for classifier free guidance
-        if do_classifier_free_guidance and negative_prompt_embeds is None:
-            uncond_tokens: List[str]
-            if negative_prompt is None:
-                uncond_tokens = [""] * batch_size
-            elif type(prompt) is not type(negative_prompt):
-                raise TypeError(
-                    f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
-                    f" {type(prompt)}."
-                )
-            elif isinstance(negative_prompt, str):
-                uncond_tokens = [negative_prompt]
-            elif batch_size != len(negative_prompt):
-                raise ValueError(
-                    f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
-                    f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
-                    " the batch size of `prompt`."
-                )
-            else:
-                uncond_tokens = negative_prompt
-            # textual inversion: procecss multi-vector tokens if necessary
-            if isinstance(self, TextualInversionLoaderMixin):
-                uncond_tokens = self.maybe_convert_prompt(uncond_tokens, self.tokenizer)
-            max_length = prompt_embeds.shape[1]
-            uncond_input = self.tokenizer(
-                uncond_tokens,
-                padding="max_length",
-                max_length=max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            if (
-                hasattr(self.text_encoder.config, "use_attention_mask")
-                and self.text_encoder.config.use_attention_mask
-            ):
-                attention_mask = uncond_input.attention_mask.to(device)
-            else:
-                attention_mask = None
-            negative_prompt_embeds = self.text_encoder(
-                uncond_input.input_ids.to(device),
-                attention_mask=attention_mask,
-            )
-            negative_prompt_embeds = negative_prompt_embeds[0]
-        if do_classifier_free_guidance:
-            # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
-            seq_len = negative_prompt_embeds.shape[1]
-            negative_prompt_embeds = negative_prompt_embeds.to(
-                dtype=self.text_encoder.dtype, device=device
-            )
-            negative_prompt_embeds = negative_prompt_embeds.repeat(
-                1, num_images_per_prompt, 1
-            )
-            negative_prompt_embeds = negative_prompt_embeds.view(
-                batch_size * num_images_per_prompt, seq_len, -1
-            )
-            # For classifier free guidance, we need to do two forward passes.
-            # Here we concatenate the unconditional and text embeddings into a single batch
-            # to avoid doing two forward passes
-            # pix2pix has two  negative embeddings, and unlike in other pipelines latents are ordered [prompt_embeds, negative_prompt_embeds, negative_prompt_embeds]
-            prompt_embeds = torch.cat(
-                [prompt_embeds, negative_prompt_embeds, negative_prompt_embeds]
-            )
-        return prompt_embeds
-    def prepare_extra_step_kwargs(self, generator, eta):
-        # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
-        # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
-        # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
-        # and should be between [0, 1]
-        accepts_eta = "eta" in set(
-            inspect.signature(self.scheduler.step).parameters.keys()
-        )
-        extra_step_kwargs = {}
-        if accepts_eta:
-            extra_step_kwargs["eta"] = eta
-        # check if the scheduler accepts generator
-        accepts_generator = "generator" in set(
-            inspect.signature(self.scheduler.step).parameters.keys()
-        )
-        if accepts_generator:
-            extra_step_kwargs["generator"] = generator
-        return extra_step_kwargs
-    def check_inputs(
-        self,
-        prompt,
-        callback_steps,
-        negative_prompt=None,
-        prompt_embeds=None,
-        negative_prompt_embeds=None,
-    ):
-        if (callback_steps is None) or (
-            callback_steps is not None
-            and (not isinstance(callback_steps, int) or callback_steps <= 0)
-        ):
-            raise ValueError(
-                f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
-                f" {type(callback_steps)}."
-            )
-        if prompt is not None and prompt_embeds is not None:
-            raise ValueError(
-                f"Cannot forward both `prompt`: {prompt} and `prompt_embeds`: {prompt_embeds}. Please make sure to"
-                " only forward one of the two."
-            )
-        elif prompt is None and prompt_embeds is None:
-            raise ValueError(
-                "Provide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined."
-            )
-        elif prompt is not None and (
-            not isinstance(prompt, str) and not isinstance(prompt, list)
-        ):
-            raise ValueError(
-                f"`prompt` has to be of type `str` or `list` but is {type(prompt)}"
-            )
-        if negative_prompt is not None and negative_prompt_embeds is not None:
-            raise ValueError(
-                f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
-                f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
-            )
-        if prompt_embeds is not None and negative_prompt_embeds is not None:
-            if prompt_embeds.shape != negative_prompt_embeds.shape:
-                raise ValueError(
-                    "`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
-                    f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
-                    f" {negative_prompt_embeds.shape}."
-                )
-    def prepare_latents(
-        self,
-        batch_size,
-        num_channels_latents,
-        height,
-        width,
-        dtype,
-        device,
-        generator,
-        latents=None,
-    ):
-        shape = (
-            batch_size,
-            num_channels_latents,
-            height // self.vae_scale_factor,
-            width // self.vae_scale_factor,
-        )
-        if isinstance(generator, list) and len(generator) != batch_size:
-            raise ValueError(
-                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
-                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
-            )
-        if latents is None:
-            latents = randn_tensor(
-                shape, generator=generator, device=device, dtype=dtype
-            )
-        else:
-            latents = latents.to(device)
-        # scale the initial noise by the standard deviation required by the scheduler
-        latents = latents * self.scheduler.init_noise_sigma
-        return latents
-    def prepare_image_latents(
-        self,
-        image,
-        batch_size,
-        num_images_per_prompt,
-        dtype,
-        device,
-        do_classifier_free_guidance,
-        generator=None,
-    ):
-        if not isinstance(image, (torch.Tensor, PIL.Image.Image, list)):
-            raise ValueError(
-                f"`image` has to be of type `torch.Tensor`, `PIL.Image.Image` or list but is {type(image)}"
-            )
-        image = image.to(device=device, dtype=dtype)
-        batch_size = batch_size * num_images_per_prompt
-        if image.shape[1] == 4:
-            image_latents = image
-        else:
-            if isinstance(generator, list) and len(generator) != batch_size:
-                raise ValueError(
-                    f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
-                    f" size of {batch_size}. Make sure the batch size matches the length of the generators."
-                )
-            if isinstance(generator, list):
-                image_latents = [
-                    self.vae.encode(image[i : i + 1]).latent_dist.mode()
-                    for i in range(batch_size)
-                ]
-                image_latents = torch.cat(image_latents, dim=0)
-            else:
-                image_latents = self.vae.encode(image).latent_dist.mode()
-        if (
-            batch_size > image_latents.shape[0]
-            and batch_size % image_latents.shape[0] == 0
-        ):
-            # expand image_latents for batch_size
-            deprecation_message = (
-                f"You have passed {batch_size} text prompts (`prompt`), but only {image_latents.shape[0]} initial"
-                " images (`image`). Initial images are now duplicating to match the number of text prompts. Note"
-                " that this behavior is deprecated and will be removed in a version 1.0.0. Please make sure to update"
-                " your script to pass as many initial images as text prompts to suppress this warning."
-            )
-            deprecate(
-                "len(prompt) != len(image)",
-                "1.0.0",
-                deprecation_message,
-                standard_warn=False,
-            )
-            additional_image_per_prompt = batch_size // image_latents.shape[0]
-            image_latents = torch.cat(
-                [image_latents] * additional_image_per_prompt, dim=0
-            )
-        elif (
-            batch_size > image_latents.shape[0]
-            and batch_size % image_latents.shape[0] != 0
-        ):
-            raise ValueError(
-                f"Cannot duplicate `image` of batch size {image_latents.shape[0]} to {batch_size} text prompts."
-            )
-        else:
-            image_latents = torch.cat([image_latents], dim=0)
-        if do_classifier_free_guidance:
-            uncond_image_latents = torch.zeros_like(image_latents)
-            image_latents = torch.cat(
-                [image_latents, image_latents, uncond_image_latents], dim=0
-            )
-        return image_latents
-    @torch.no_grad()
-    def __call__(
-        self,
-        prompt: Union[str, List[str]] = None,
-        photo: Union[
-            torch.FloatTensor,
-            PIL.Image.Image,
-            np.ndarray,
-            List[torch.FloatTensor],
-            List[PIL.Image.Image],
-            List[np.ndarray],
-        ] = None,
-        height: Optional[int] = None,
-        width: Optional[int] = None,
-        num_inference_steps: int = 100,
-        required_aovs: List[str] = ["albedo"],
-        negative_prompt: Optional[Union[str, List[str]]] = None,
-        num_images_per_prompt: Optional[int] = 1,
-        use_default_scaling_factor: Optional[bool] = False,
-        guidance_scale: float = 0.0,
-        image_guidance_scale: float = 0.0,
-        guidance_rescale: float = 0.0,
-        eta: float = 0.0,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-        output_type: Optional[str] = "pil",
-        return_dict: bool = True,
-        callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
-        callback_steps: int = 1,
-    ):
-        r"""
-        The call function to the pipeline for generation.
-        Args:
-            prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
-            image (`torch.FloatTensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
-                `Image` or tensor representing an image batch to be repainted according to `prompt`. Can also accept
-                image latents as `image`, but if passing latents directly it is not encoded again.
-            num_inference_steps (`int`, *optional*, defaults to 100):
-                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
-                expense of slower inference.
-            guidance_scale (`float`, *optional*, defaults to 7.5):
-                A higher guidance scale value encourages the model to generate images closely linked to the text
-                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
-            image_guidance_scale (`float`, *optional*, defaults to 1.5):
-                Push the generated image towards the inital `image`. Image guidance scale is enabled by setting
-                `image_guidance_scale > 1`. Higher image guidance scale encourages generated images that are closely
-                linked to the source `image`, usually at the expense of lower image quality. This pipeline requires a
-                value of at least `1`.
-            negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
-                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
-            num_images_per_prompt (`int`, *optional*, defaults to 1):
-                The number of images to generate per prompt.
-            eta (`float`, *optional*, defaults to 0.0):
-                Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
-                to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
-            generator (`torch.Generator`, *optional*):
-                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
-                generation deterministic.
-            latents (`torch.FloatTensor`, *optional*):
-                Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
-                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
-                tensor is generated by sampling using the supplied random `generator`.
-            prompt_embeds (`torch.FloatTensor`, *optional*):
-                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
-                provided, text embeddings are generated from the `prompt` input argument.
-            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
-                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
-                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
-            output_type (`str`, *optional*, defaults to `"pil"`):
-                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
-            return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
-                plain tuple.
-            callback (`Callable`, *optional*):
-                A function that calls every `callback_steps` steps during inference. The function is called with the
-                following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
-            callback_steps (`int`, *optional*, defaults to 1):
-                The frequency at which the `callback` function is called. If not specified, the callback is called at
-                every step.
-        Examples:
-        ```py
-        >>> import PIL
-        >>> import requests
-        >>> import torch
-        >>> from io import BytesIO
-        >>> from diffusers import StableDiffusionInstructPix2PixPipeline
-        >>> def download_image(url):
-        ...     response = requests.get(url)
-        ...     return PIL.Image.open(BytesIO(response.content)).convert("RGB")
-        >>> img_url = "https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/mountain.png"
-        >>> image = download_image(img_url).resize((512, 512))
-        >>> pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
-        ...     "timbrooks/instruct-pix2pix", torch_dtype=torch.float16
-        ... )
-        >>> pipe = pipe.to("cuda")
-        >>> prompt = "make the mountains snowy"
-        >>> image = pipe(prompt=prompt, image=image).images[0]
-        ```
-        Returns:
-            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
-                If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
-                otherwise a `tuple` is returned where the first element is a list with the generated images and the
-                second element is a list of `bool`s indicating whether the corresponding generated image contains
-                "not-safe-for-work" (nsfw) content.
-        """
-        # 0. Check inputs
-        self.check_inputs(
-            prompt,
-            callback_steps,
-            negative_prompt,
-            prompt_embeds,
-            negative_prompt_embeds,
-        )
-        # 1. Define call parameters
-        if prompt is not None and isinstance(prompt, str):
-            batch_size = 1
-        elif prompt is not None and isinstance(prompt, list):
-            batch_size = len(prompt)
-        else:
-            batch_size = prompt_embeds.shape[0]
-        device = self._execution_device
-        do_classifier_free_guidance = (
-            guidance_scale > 1.0 and image_guidance_scale >= 1.0
-        )
-        # check if scheduler is in sigmas space
-        scheduler_is_in_sigma_space = hasattr(self.scheduler, "sigmas")
-        # 2. Encode input prompt
-        prompt_embeds = self._encode_prompt(
-            prompt,
-            device,
-            num_images_per_prompt,
-            do_classifier_free_guidance,
-            negative_prompt,
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-        )
-        # 3. Preprocess image
-        # Normalize image to [-1,1]
-        preprocessed_photo = self.image_processor.preprocess(photo)
-        # 4. set timesteps
-        self.scheduler.set_timesteps(num_inference_steps, device=device)
-        timesteps = self.scheduler.timesteps
-        # 5. Prepare Image latents
-        image_latents = self.prepare_image_latents(
-            preprocessed_photo,
-            batch_size,
-            num_images_per_prompt,
-            prompt_embeds.dtype,
-            device,
-            do_classifier_free_guidance,
-            generator,
-        )
-        image_latents = image_latents * self.vae.config.scaling_factor
-        height, width = image_latents.shape[-2:]
-        height = height * self.vae_scale_factor
-        width = width * self.vae_scale_factor
-        # 6. Prepare latent variables
-        num_channels_latents = self.unet.config.out_channels
-        latents = self.prepare_latents(
-            batch_size * num_images_per_prompt,
-            num_channels_latents,
-            height,
-            width,
-            prompt_embeds.dtype,
-            device,
-            generator,
-            latents,
-        )
-        # 7. Check that shapes of latents and image match the UNet channels
-        num_channels_image = image_latents.shape[1]
-        if num_channels_latents + num_channels_image != self.unet.config.in_channels:
-            raise ValueError(
-                f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
-                f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
-                f" `num_channels_image`: {num_channels_image} "
-                f" = {num_channels_latents+num_channels_image}. Please verify the config of"
-                " `pipeline.unet` or your `image` input."
-            )
-        # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-        # 9. Denoising loop
-        num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                # Expand the latents if we are doing classifier free guidance.
-                # The latents are expanded 3 times because for pix2pix the guidance\
-                # is applied for both the text and the input image.
-                latent_model_input = (
-                    torch.cat([latents] * 3) if do_classifier_free_guidance else latents
-                )
-                # concat latents, image_latents in the channel dimension
-                scaled_latent_model_input = self.scheduler.scale_model_input(
-                    latent_model_input, t
-                )
-                scaled_latent_model_input = torch.cat(
-                    [scaled_latent_model_input, image_latents], dim=1
-                )
-                # predict the noise residual
-                noise_pred = self.unet(
-                    scaled_latent_model_input,
-                    t,
-                    encoder_hidden_states=prompt_embeds,
-                    return_dict=False,
-                )[0]
-                # perform guidance
-                if do_classifier_free_guidance:
-                    (
-                        noise_pred_text,
-                        noise_pred_image,
-                        noise_pred_uncond,
-                    ) = noise_pred.chunk(3)
-                    noise_pred = (
-                        noise_pred_uncond
-                        + guidance_scale * (noise_pred_text - noise_pred_image)
-                        + image_guidance_scale * (noise_pred_image - noise_pred_uncond)
-                    )
-                if do_classifier_free_guidance and guidance_rescale > 0.0:
-                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
-                    noise_pred = rescale_noise_cfg(
-                        noise_pred, noise_pred_text, guidance_rescale=guidance_rescale
-                    )
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(
-                    noise_pred, t, latents, **extra_step_kwargs, return_dict=False
-                )[0]
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or (
-                    (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
-                ):
-                    progress_bar.update()
-                    if callback is not None and i % callback_steps == 0:
-                        callback(i, t, latents)
-            aov_latents = latents / self.vae.config.scaling_factor
-            aov = self.vae.decode(aov_latents, return_dict=False)[0]
-            do_denormalize = [True] * aov.shape[0]
-            aov_name = required_aovs[0]
-            if aov_name == "albedo" or aov_name == "irradiance":
-                do_gamma_correction = True
-            else:
-                do_gamma_correction = False
-            if aov_name == "roughness" or aov_name == "metallic":
-                aov = aov[:, 0:1].repeat(1, 3, 1, 1)
-            aov = self.image_processor.postprocess(
-                aov,
-                output_type=output_type,
-                do_denormalize=do_denormalize,
-                do_gamma_correction=do_gamma_correction,
-            )
-            aovs = [aov]
-        # Offload last model to CPU
-        if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
-            self.final_offload_hook.offload()
-        return StableDiffusionAOVPipelineOutput(images=aovs)

run.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+CONDA_ENV=$(head -1 /code/environment.yml | cut -d" " -f2)
+eval "$(conda shell.bash hook)"
+conda activate $CONDA_ENV
+python app.py

settings.py DELETED Viewed

@@ -1,23 +0,0 @@
-import os
-import numpy as np
-DEFAULT_MODEL_ID = os.getenv("DEFAULT_MODEL_ID", "stable-diffusion-v1-5/stable-diffusion-v1-5")
-MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "3"))
-DEFAULT_NUM_IMAGES = min(MAX_NUM_IMAGES, int(os.getenv("DEFAULT_NUM_IMAGES", "1")))
-MAX_IMAGE_RESOLUTION = int(os.getenv("MAX_IMAGE_RESOLUTION", "2048"))
-DEFAULT_IMAGE_RESOLUTION = min(MAX_IMAGE_RESOLUTION, int(os.getenv("DEFAULT_IMAGE_RESOLUTION", "1024")))
-ALLOW_CHANGING_BASE_MODEL = os.getenv("SPACE_ID") != "hysts/ControlNet-v1-1"
-SHOW_DUPLICATE_BUTTON = os.getenv("SHOW_DUPLICATE_BUTTON") == "1"
-MAX_SEED = np.iinfo(np.int32).max
-# Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
-# setup CUDA
-# disable the following when deployting to hugging face
-# if os.getenv("CUDA_VISIBLE_DEVICES") is None:
-#     os.environ["CUDA_VISIBLE_DEVICES"] = "7"
-#     os.environ["GRADIO_SERVER_PORT"] = "7864"

text2tex/lib/__init__.py DELETED Viewed

File without changes

text2tex/lib/camera_helper.py DELETED Viewed

@@ -1,231 +0,0 @@
-import torch
-import numpy as np
-from sklearn.metrics.pairwise import cosine_similarity
-from pytorch3d.renderer import (
-    PerspectiveCameras,
-    look_at_view_transform
-)
-# customized
-import sys
-sys.path.append(".")
-from lib.constants import VIEWPOINTS
-# ---------------- UTILS ----------------------
-def degree_to_radian(d):
-    return d * np.pi / 180
-def radian_to_degree(r):
-    return 180 * r / np.pi
-def xyz_to_polar(xyz):
-    """ assume y-axis is the up axis """
-    x, y, z = xyz
-    theta = 180 * np.arccos(z) / np.pi
-    phi = 180 * np.arccos(y) / np.pi
-    return theta, phi
-def polar_to_xyz(theta, phi, dist):
-    """ assume y-axis is the up axis """
-    theta = degree_to_radian(theta)
-    phi = degree_to_radian(phi)
-    x = np.sin(phi) * np.sin(theta) * dist
-    y = np.cos(phi) * dist
-    z = np.sin(phi) * np.cos(theta) * dist
-    return [x, y, z]
-# ---------------- VIEWPOINTS ----------------------
-def filter_viewpoints(pre_viewpoints: dict, viewpoints: dict):
-    """ return the binary mask of viewpoints to be filtered """
-    filter_mask = [0 for _ in viewpoints.keys()]
-    for i, v in viewpoints.items():
-        x_v, y_v, z_v = polar_to_xyz(v["azim"], 90 - v["elev"], v["dist"])
-        for _, pv in pre_viewpoints.items():
-            x_pv, y_pv, z_pv = polar_to_xyz(pv["azim"], 90 - pv["elev"], pv["dist"])
-            sim = cosine_similarity(
-                np.array([[x_v, y_v, z_v]]),
-                np.array([[x_pv, y_pv, z_pv]])
-            )[0, 0]
-            if sim > 0.9:
-                filter_mask[i] = 1
-    return filter_mask
-def init_viewpoints(mode, sample_space, init_dist, init_elev, principle_directions,
-    use_principle=True, use_shapenet=False, use_objaverse=False):
-    if mode == "predefined":
-        (
-            dist_list,
-            elev_list,
-            azim_list,
-            sector_list
-        ) = init_predefined_viewpoints(sample_space, init_dist, init_elev)
-    elif mode == "hemisphere":
-        (
-            dist_list,
-            elev_list,
-            azim_list,
-            sector_list
-        ) = init_hemisphere_viewpoints(sample_space, init_dist)
-    else:
-        raise NotImplementedError()
-    # punishments for views -> in case always selecting the same view
-    view_punishments = [1 for _ in range(len(dist_list))]
-    if use_principle:
-        (
-            dist_list,
-            elev_list,
-            azim_list,
-            sector_list,
-            view_punishments
-        ) = init_principle_viewpoints(
-            principle_directions,
-            dist_list,
-            elev_list,
-            azim_list,
-            sector_list,
-            view_punishments,
-            use_shapenet,
-            use_objaverse
-        )
-    return dist_list, elev_list, azim_list, sector_list, view_punishments
-def init_principle_viewpoints(
-    principle_directions,
-    dist_list,
-    elev_list,
-    azim_list,
-    sector_list,
-    view_punishments,
-    use_shapenet=False,
-    use_objaverse=False
-):
-    if use_shapenet:
-        key = "shapenet"
-        pre_elev_list = [v for v in VIEWPOINTS[key]["elev"]]
-        pre_azim_list = [v for v in VIEWPOINTS[key]["azim"]]
-        pre_sector_list = [v for v in VIEWPOINTS[key]["sector"]]
-        num_principle = 10
-        pre_dist_list = [dist_list[0] for _ in range(num_principle)]
-        pre_view_punishments = [0 for _ in range(num_principle)]
-    elif use_objaverse:
-        key = "objaverse"
-        pre_elev_list = [v for v in VIEWPOINTS[key]["elev"]]
-        pre_azim_list = [v for v in VIEWPOINTS[key]["azim"]]
-        pre_sector_list = [v for v in VIEWPOINTS[key]["sector"]]
-        num_principle = 10
-        pre_dist_list = [dist_list[0] for _ in range(num_principle)]
-        pre_view_punishments = [0 for _ in range(num_principle)]
-    else:
-        num_principle = 6
-        pre_elev_list = [v for v in VIEWPOINTS[num_principle]["elev"]]
-        pre_azim_list = [v for v in VIEWPOINTS[num_principle]["azim"]]
-        pre_sector_list = [v for v in VIEWPOINTS[num_principle]["sector"]]
-        pre_dist_list = [dist_list[0] for _ in range(num_principle)]
-        pre_view_punishments = [0 for _ in range(num_principle)]
-    dist_list = pre_dist_list + dist_list
-    elev_list = pre_elev_list + elev_list
-    azim_list = pre_azim_list + azim_list
-    sector_list = pre_sector_list + sector_list
-    view_punishments = pre_view_punishments + view_punishments
-    return dist_list, elev_list, azim_list, sector_list, view_punishments
-def init_predefined_viewpoints(sample_space, init_dist, init_elev):
-    viewpoints = VIEWPOINTS[sample_space]
-    assert sample_space == len(viewpoints["sector"])
-    dist_list = [init_dist for _ in range(sample_space)] # always the same dist
-    elev_list = [viewpoints["elev"][i] for i in range(sample_space)]
-    azim_list = [viewpoints["azim"][i] for i in range(sample_space)]
-    sector_list = [viewpoints["sector"][i] for i in range(sample_space)]
-    return dist_list, elev_list, azim_list, sector_list
-def init_hemisphere_viewpoints(sample_space, init_dist):
-    """
-        y is up-axis
-    """
-    num_points = 2 * sample_space
-    ga = np.pi * (3. - np.sqrt(5.))  # golden angle in radians
-    flags = []
-    elev_list = [] # degree
-    azim_list = [] # degree
-    for i in range(num_points):
-        y = 1 - (i / float(num_points - 1)) * 2  # y goes from 1 to -1
-        # only take the north hemisphere
-        if y >= 0:
-            flags.append(True)
-        else:
-            flags.append(False)
-        theta = ga * i  # golden angle increment
-        elev_list.append(radian_to_degree(np.arcsin(y)))
-        azim_list.append(radian_to_degree(theta))
-        radius = np.sqrt(1 - y * y)  # radius at y
-        x = np.cos(theta) * radius
-        z = np.sin(theta) * radius
-    elev_list = [elev_list[i] for i in range(len(elev_list)) if flags[i]]
-    azim_list = [azim_list[i] for i in range(len(azim_list)) if flags[i]]
-    dist_list = [init_dist for _ in elev_list]
-    sector_list = ["good" for _ in elev_list] # HACK don't define sector names for now
-    return dist_list, elev_list, azim_list, sector_list
-# ---------------- CAMERAS ----------------------
-def init_camera(dist, elev, azim, image_size, device):
-    R, T = look_at_view_transform(dist, elev, azim)
-    image_size = torch.tensor([image_size, image_size]).unsqueeze(0)
-    cameras = PerspectiveCameras(R=R, T=T, device=device, image_size=image_size)
-    return cameras

text2tex/lib/constants.py DELETED Viewed

@@ -1,648 +0,0 @@
-PALETTE = {
-    0: [255, 255, 255], # white  -  background
-    1: [204, 50, 50],   # red    -  old
-    2: [231, 180, 22],  # yellow -  update
-    3: [45, 201, 55]    # green  -  new
-}
-QUAD_WEIGHTS = {
-    0: 0, # background
-    1: 0.1,   # old
-    2: 0.5,  # update
-    3: 1    # new
-}
-VIEWPOINTS = {
-    1: {
-        "azim": [
-            0
-        ],
-        "elev": [
-            0
-        ],
-        "sector": [
-            "front"
-        ]
-    },
-    2: {
-        "azim": [
-            0,
-            30
-        ],
-        "elev": [
-            0,
-            0
-        ],
-        "sector": [
-            "front",
-            "front"
-        ]
-    },
-    4: {
-        "azim": [
-            45,
-            315,
-            135,
-            225,
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            0,
-        ],
-        "sector": [
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-        ]
-    },
-    6: {
-        "azim": [
-            0,
-            90,
-            270,
-            0,
-            180,
-            0
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            90,
-            0,
-            -90
-        ],
-        "sector": [
-            "front",
-            "right",
-            "left",
-            "top",
-            "back",
-            "bottom",
-        ]
-    },
-    "shapenet": {
-        "azim": [
-            270,
-            315,
-            225,
-            0,
-            180,
-            45,
-            135,
-            90,
-            270,
-            270
-        ],
-        "elev": [
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            90,
-            -90
-        ],
-        "sector": [
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "top",
-            "bottom",
-        ]
-    },
-    "objaverse": {
-        "azim": [
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            0,
-            0
-        ],
-        "elev": [
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            90,
-            -90
-        ],
-        "sector": [
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "top",
-            "bottom",
-        ]
-    },
-    12: {
-        "azim": [
-            45,
-            315,
-            135,
-            225,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            0,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-        ],
-        "sector": [
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-        ]
-    },
-    20: {
-        "azim": [
-            45,
-            315,
-            135,
-            225,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            0,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-        ],
-        "sector": [
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-        ]
-    },
-    36: {
-        "azim": [
-            45,
-            315,
-            135,
-            225,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5,
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            0,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-        ],
-        "sector": [
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "top front",
-            "top right",
-            "top left",
-            "top right",
-            "top left",
-            "top right",
-            "top left",
-            "top back",
-            "front right",
-            "front left",
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "back right",
-            "back left",
-            "front right",
-            "front left",
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "back right",
-            "back left",
-        ]
-    },
-    68: {
-        "azim": [
-            45,
-            315,
-            135,
-            225,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            0,
-            45,
-            315,
-            90,
-            270,
-            135,
-            225,
-            180,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5,
-            22.5,
-            337.5,
-            67.5,
-            292.5,
-            112.5,
-            247.5,
-            157.5,
-            202.5
-        ],
-        "elev": [
-            0,
-            0,
-            0,
-            0,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            30,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            60,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            15,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            45,
-            -30,
-            -30,
-            -30,
-            -30,
-            -30,
-            -30,
-            -30,
-            -30,
-            -60,
-            -60,
-            -60,
-            -60,
-            -60,
-            -60,
-            -60,
-            -60,
-            -15,
-            -15,
-            -15,
-            -15,
-            -15,
-            -15,
-            -15,
-            -15,
-            -45,
-            -45,
-            -45,
-            -45,
-            -45,
-            -45,
-            -45,
-            -45,
-        ],
-        "sector": [
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "top front",
-            "top right",
-            "top left",
-            "top right",
-            "top left",
-            "top right",
-            "top left",
-            "top back",
-            "front right",
-            "front left",
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "back right",
-            "back left",
-            "front right",
-            "front left",
-            "front right",
-            "front left",
-            "back right",
-            "back left",
-            "back right",
-            "back left",
-            "front",
-            "front right",
-            "front left",
-            "right",
-            "left",
-            "back right",
-            "back left",
-            "back",
-            "bottom front",
-            "bottom right",
-            "bottom left",
-            "bottom right",
-            "bottom left",
-            "bottom right",
-            "bottom left",
-            "bottom back",
-            "bottom front right",
-            "bottom front left",
-            "bottom front right",
-            "bottom front left",
-            "bottom back right",
-            "bottom back left",
-            "bottom back right",
-            "bottom back left",
-            "bottom front right",
-            "bottom front left",
-            "bottom front right",
-            "bottom front left",
-            "bottom back right",
-            "bottom back left",
-            "bottom back right",
-            "bottom back left",
-        ]
-    }
-}

text2tex/lib/diffusion_helper.py DELETED Viewed

@@ -1,189 +0,0 @@
-import torch
-import cv2
-import numpy as np
-from PIL import Image
-from torchvision import transforms
-# Stable Diffusion 2
-from diffusers import (
-    StableDiffusionInpaintPipeline,
-    StableDiffusionPipeline,
-    EulerDiscreteScheduler
-)
-# customized
-import sys
-sys.path.append(".")
-from models.ControlNet.gradio_depth2image import init_model, process
-def get_controlnet_depth():
-    print("=> initializing ControlNet Depth...")
-    model, ddim_sampler = init_model()
-    return model, ddim_sampler
-def get_inpainting(device):
-    print("=> initializing Inpainting...")
-    model = StableDiffusionInpaintPipeline.from_pretrained(
-        "stabilityai/stable-diffusion-2-inpainting",
-        torch_dtype=torch.float16,
-    ).to(device)
-    return model
-def get_text2image(device):
-    print("=> initializing Inpainting...")
-    model_id = "stabilityai/stable-diffusion-2"
-    scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
-    model = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16).to(device)
-    return model
-@torch.no_grad()
-def apply_controlnet_depth(model, ddim_sampler,
-    init_image, prompt, strength, ddim_steps,
-    generate_mask_image, keep_mask_image, depth_map_np,
-    a_prompt, n_prompt, guidance_scale, seed, eta, num_samples,
-    device, blend=0, save_memory=False):
-    """
-        Use Stable Diffusion 2 to generate image
-        Arguments:
-            args: input arguments
-            model: Stable Diffusion 2 model
-            init_image_tensor: input image, torch.FloatTensor of shape (1, H, W, 3)
-            mask_tensor: depth map of the input image, torch.FloatTensor of shape (1, H, W, 1)
-            depth_map_np: depth map of the input image, torch.FloatTensor of shape (1, H, W)
-    """
-    print("=> generating ControlNet Depth RePaint image...")
-    # Stable Diffusion 2 receives PIL.Image
-    # NOTE Stable Diffusion 2 returns a PIL.Image object
-    # image and mask_image should be PIL images.
-    # The mask structure is white for inpainting and black for keeping as is
-    diffused_image_np = process(
-        model, ddim_sampler,
-        np.array(init_image), prompt, a_prompt, n_prompt, num_samples,
-        ddim_steps, guidance_scale, seed, eta,
-        strength=strength, detected_map=depth_map_np, unknown_mask=np.array(generate_mask_image), save_memory=save_memory
-    )[0]
-    init_image = init_image.convert("RGB")
-    diffused_image = Image.fromarray(diffused_image_np).convert("RGB")
-    if blend > 0 and transforms.ToTensor()(keep_mask_image).sum() > 0:
-        print("=> blending the generated region...")
-        kernel_size = 3
-        kernel = np.ones((kernel_size, kernel_size), np.uint8)
-        keep_image_np = np.array(init_image).astype(np.uint8)
-        keep_image_np_dilate = cv2.dilate(keep_image_np, kernel, iterations=1)
-        keep_mask_np = np.array(keep_mask_image).astype(np.uint8)
-        keep_mask_np_dilate = cv2.dilate(keep_mask_np, kernel, iterations=1)
-        generate_image_np = np.array(diffused_image).astype(np.uint8)
-        overlap_mask_np = np.array(generate_mask_image).astype(np.uint8)
-        overlap_mask_np *= keep_mask_np_dilate
-        print("=> blending {} pixels...".format(np.sum(overlap_mask_np)))
-        overlap_keep = keep_image_np_dilate[overlap_mask_np == 1]
-        overlap_generate = generate_image_np[overlap_mask_np == 1]
-        overlap_np = overlap_keep * blend + overlap_generate * (1 - blend)
-        generate_image_np[overlap_mask_np == 1] = overlap_np
-        diffused_image = Image.fromarray(generate_image_np.astype(np.uint8)).convert("RGB")
-    init_image_masked = init_image
-    diffused_image_masked = diffused_image
-    return diffused_image, init_image_masked, diffused_image_masked
-@torch.no_grad()
-def apply_inpainting(model,
-    init_image, mask_image_tensor, prompt, height, width, device):
-    """
-        Use Stable Diffusion 2 to generate image
-        Arguments:
-            args: input arguments
-            model: Stable Diffusion 2 model
-            init_image_tensor: input image, torch.FloatTensor of shape (1, H, W, 3)
-            mask_tensor: depth map of the input image, torch.FloatTensor of shape (1, H, W, 1)
-            depth_map_tensor: depth map of the input image, torch.FloatTensor of shape (1, H, W)
-    """
-    print("=> generating Inpainting image...")
-    mask_image = mask_image_tensor[0].cpu()
-    mask_image = mask_image.permute(2, 0, 1)
-    mask_image = transforms.ToPILImage()(mask_image).convert("L")
-    # NOTE Stable Diffusion 2 returns a PIL.Image object
-    # image and mask_image should be PIL images.
-    # The mask structure is white for inpainting and black for keeping as is
-    diffused_image = model(
-        prompt=prompt,
-        image=init_image.resize((512, 512)),
-        mask_image=mask_image.resize((512, 512)),
-        height=512,
-        width=512
-    ).images[0].resize((height, width))
-    return diffused_image
-@torch.no_grad()
-def apply_inpainting_postprocess(model,
-    init_image, mask_image_tensor, prompt, height, width, device):
-    """
-        Use Stable Diffusion 2 to generate image
-        Arguments:
-            args: input arguments
-            model: Stable Diffusion 2 model
-            init_image_tensor: input image, torch.FloatTensor of shape (1, H, W, 3)
-            mask_tensor: depth map of the input image, torch.FloatTensor of shape (1, H, W, 1)
-            depth_map_tensor: depth map of the input image, torch.FloatTensor of shape (1, H, W)
-    """
-    print("=> generating Inpainting image...")
-    mask_image = mask_image_tensor[0].cpu()
-    mask_image = mask_image.permute(2, 0, 1)
-    mask_image = transforms.ToPILImage()(mask_image).convert("L")
-    # NOTE Stable Diffusion 2 returns a PIL.Image object
-    # image and mask_image should be PIL images.
-    # The mask structure is white for inpainting and black for keeping as is
-    diffused_image = model(
-        prompt=prompt,
-        image=init_image.resize((512, 512)),
-        mask_image=mask_image.resize((512, 512)),
-        height=512,
-        width=512
-    ).images[0].resize((height, width))
-    diffused_image_tensor = torch.from_numpy(np.array(diffused_image)).to(device)
-    init_images_tensor = torch.from_numpy(np.array(init_image)).to(device)
-    init_images_tensor = diffused_image_tensor * mask_image_tensor[0] + init_images_tensor * (1 - mask_image_tensor[0])
-    init_image = Image.fromarray(init_images_tensor.cpu().numpy().astype(np.uint8)).convert("RGB")
-    return init_image

text2tex/lib/io_helper.py DELETED Viewed

@@ -1,78 +0,0 @@
-# common utils
-import os
-import json
-# numpy
-import numpy as np
-# visualization
-import matplotlib
-import matplotlib.cm as cm
-import matplotlib.pyplot as plt
-matplotlib.use("Agg")
-from pytorch3d.io import save_obj
-from torchvision import transforms
-def save_depth(fragments, output_dir, init_image, view_idx):
-    print("=> saving depth...")
-    width, height = init_image.size
-    dpi = 100
-    figsize = width / float(dpi), height / float(dpi)
-    depth_np = fragments.zbuf[0].cpu().numpy()
-    fig = plt.figure(figsize=figsize)
-    ax = fig.add_axes([0, 0, 1, 1])
-    # Hide spines, ticks, etc.
-    ax.axis('off')
-    # Display the image.
-    ax.imshow(depth_np, cmap='gray')
-    plt.savefig(os.path.join(output_dir, "{}.png".format(view_idx)), bbox_inches='tight', pad_inches=0)
-    np.save(os.path.join(output_dir, "{}.npy".format(view_idx)), depth_np[..., 0])
-def save_backproject_obj(output_dir, obj_name,
-    verts, faces, verts_uvs, faces_uvs, projected_texture,
-    device):
-    print("=> saving OBJ file...")
-    texture_map = transforms.ToTensor()(projected_texture).to(device)
-    texture_map = texture_map.permute(1, 2, 0)
-    obj_path = os.path.join(output_dir, obj_name)
-    save_obj(
-        obj_path,
-        verts=verts,
-        faces=faces,
-        decimal_places=5,
-        verts_uvs=verts_uvs,
-        faces_uvs=faces_uvs,
-        texture_map=texture_map
-    )
-def save_args(args, output_dir):
-    with open(os.path.join(output_dir, "args.json"), "w") as f:
-        json.dump(
-            {k: v for k, v in vars(args).items()},
-            f,
-            indent=4
-        )
-def save_viewpoints(args, output_dir, dist_list, elev_list, azim_list, view_list):
-    with open(os.path.join(output_dir, "viewpoints.json"), "w") as f:
-        json.dump(
-            {
-                "dist": dist_list,
-                "elev": elev_list,
-                "azim": azim_list,
-                "view": view_list
-            },
-            f,
-            indent=4
-        )

text2tex/lib/mesh_helper.py DELETED Viewed

@@ -1,148 +0,0 @@
-import os
-import torch
-import trimesh
-import xatlas
-import numpy as np
-from sklearn.decomposition import PCA
-from torchvision import transforms
-from tqdm import tqdm
-from pytorch3d.io import (
-    load_obj,
-    load_objs_as_meshes
-)
-def compute_principle_directions(model_path, num_points=20000):
-    mesh = trimesh.load_mesh(model_path, force="mesh")
-    pc, _ = trimesh.sample.sample_surface_even(mesh, num_points)
-    pc -= np.mean(pc, axis=0, keepdims=True)
-    principle_directions = PCA(n_components=3).fit(pc).components_
-    return principle_directions
-def init_mesh(input_path, cache_path, device):
-    print("=> parameterizing target mesh...")
-    mesh = trimesh.load_mesh(input_path, force='mesh')
-    try:
-        vertices, faces = mesh.vertices, mesh.faces
-    except AttributeError:
-        print("multiple materials in {} are not supported".format(input_path))
-        exit()
-    vmapping, indices, uvs = xatlas.parametrize(vertices, faces)
-    xatlas.export(str(cache_path), vertices[vmapping], indices, uvs)
-    print("=> loading target mesh...")
-    # principle_directions = compute_principle_directions(cache_path)
-    principle_directions = None
-    _, faces, aux = load_obj(cache_path, device=device)
-    mesh = load_objs_as_meshes([cache_path], device=device)
-    num_verts = mesh.verts_packed().shape[0]
-    # make sure mesh center is at origin
-    bbox = mesh.get_bounding_boxes()
-    mesh_center = bbox.mean(dim=2).repeat(num_verts, 1)
-    mesh = apply_offsets_to_mesh(mesh, -mesh_center)
-    # make sure mesh size is normalized
-    box_size = bbox[..., 1] - bbox[..., 0]
-    box_max = box_size.max(dim=1, keepdim=True)[0].repeat(num_verts, 3)
-    mesh = apply_scale_to_mesh(mesh, 1 / box_max)
-    return mesh, mesh.verts_packed(), faces, aux, principle_directions, mesh_center, box_max
-def apply_offsets_to_mesh(mesh, offsets):
-    new_mesh = mesh.offset_verts(offsets)
-    return new_mesh
-def apply_scale_to_mesh(mesh, scale):
-    new_mesh = mesh.scale_verts(scale)
-    return new_mesh
-def adjust_uv_map(faces, aux, init_texture, uv_size):
-    """
-        adjust UV map to be compatiable with multiple textures.
-        UVs for different materials will be decomposed and placed horizontally
-        +-----+-----+-----+--
-        |  1  |  2  |  3  |
-        +-----+-----+-----+--
-    """
-    textures_ids = faces.textures_idx
-    materials_idx = faces.materials_idx
-    verts_uvs = aux.verts_uvs
-    num_materials = torch.unique(materials_idx).shape[0]
-    new_verts_uvs = verts_uvs.clone()
-    for material_id in range(num_materials):
-        # apply offsets to horizontal axis
-        faces_ids = textures_ids[materials_idx == material_id].unique()
-        new_verts_uvs[faces_ids, 0] += material_id
-    new_verts_uvs[:, 0] /= num_materials
-    init_texture_tensor = transforms.ToTensor()(init_texture)
-    init_texture_tensor = torch.cat([init_texture_tensor for _ in range(num_materials)], dim=-1)
-    init_texture = transforms.ToPILImage()(init_texture_tensor).resize((uv_size, uv_size))
-    return new_verts_uvs, init_texture
-@torch.no_grad()
-def update_face_angles(mesh, cameras, fragments):
-    def get_angle(x, y):
-        x = torch.nn.functional.normalize(x)
-        y = torch.nn.functional.normalize(y)
-        inner_product = (x * y).sum(dim=1)
-        x_norm = x.pow(2).sum(dim=1).pow(0.5)
-        y_norm = y.pow(2).sum(dim=1).pow(0.5)
-        cos = inner_product / (x_norm * y_norm)
-        angle = torch.acos(cos)
-        angle = angle * 180 / 3.14159
-        return angle
-    # face normals
-    face_normals = mesh.faces_normals_padded()[0]
-    # view vector (object center -> camera center)
-    camera_center = cameras.get_camera_center()
-    face_angles = get_angle(
-        face_normals,
-        camera_center.repeat(face_normals.shape[0], 1)
-    ) # (F)
-    face_angles_rev = get_angle(
-        face_normals,
-        -camera_center.repeat(face_normals.shape[0], 1)
-    ) # (F)
-    face_angles = torch.minimum(face_angles, face_angles_rev)
-    # Indices of unique visible faces
-    visible_map = fragments.pix_to_face.unique()  # (num_visible_faces)
-    invisible_mask = torch.ones_like(face_angles)
-    invisible_mask[visible_map] = 0
-    face_angles[invisible_mask == 1] = 10000.  # angles of invisible faces are ignored
-    return face_angles

text2tex/lib/projection_helper.py DELETED Viewed

@@ -1,464 +0,0 @@
-import os
-import torch
-import cv2
-import random
-import numpy as np
-from torchvision import transforms
-from pytorch3d.renderer import TexturesUV
-from pytorch3d.ops import interpolate_face_attributes
-from PIL import Image
-from tqdm import tqdm
-# customized
-import sys
-sys.path.append(".")
-from lib.camera_helper import init_camera
-from lib.render_helper import init_renderer, render
-from lib.shading_helper import (
-    BlendParams,
-    init_soft_phong_shader,
-    init_flat_texel_shader,
-)
-from lib.vis_helper import visualize_outputs, visualize_quad_mask
-from lib.constants import *
-def get_all_4_locations(values_y, values_x):
-    y_0 = torch.floor(values_y)
-    y_1 = torch.ceil(values_y)
-    x_0 = torch.floor(values_x)
-    x_1 = torch.ceil(values_x)
-    return torch.cat([y_0, y_0, y_1, y_1], 0).long(), torch.cat([x_0, x_1, x_0, x_1], 0).long()
-def compose_quad_mask(new_mask_image, update_mask_image, old_mask_image, device):
-    """
-        compose quad mask:
-            -> 0: background
-            -> 1: old
-            -> 2: update
-            -> 3: new
-    """
-    new_mask_tensor = transforms.ToTensor()(new_mask_image).to(device)
-    update_mask_tensor = transforms.ToTensor()(update_mask_image).to(device)
-    old_mask_tensor = transforms.ToTensor()(old_mask_image).to(device)
-    all_mask_tensor = new_mask_tensor + update_mask_tensor + old_mask_tensor
-    quad_mask_tensor = torch.zeros_like(all_mask_tensor)
-    quad_mask_tensor[old_mask_tensor == 1] = 1
-    quad_mask_tensor[update_mask_tensor == 1] = 2
-    quad_mask_tensor[new_mask_tensor == 1] = 3
-    return old_mask_tensor, update_mask_tensor, new_mask_tensor, all_mask_tensor, quad_mask_tensor
-def compute_view_heat(similarity_tensor, quad_mask_tensor):
-    num_total_pixels = quad_mask_tensor.reshape(-1).shape[0]
-    heat = 0
-    for idx in QUAD_WEIGHTS:
-        heat += (quad_mask_tensor == idx).sum() * QUAD_WEIGHTS[idx] / num_total_pixels
-    return heat
-def select_viewpoint(selected_view_ids, view_punishments,
-    mode, dist_list, elev_list, azim_list, sector_list, view_idx,
-    similarity_texture_cache, exist_texture,
-    mesh, faces, verts_uvs,
-    image_size, faces_per_pixel,
-    init_image_dir, mask_image_dir, normal_map_dir, depth_map_dir, similarity_map_dir,
-    device, use_principle=False
-):
-    if mode == "sequential":
-        num_views = len(dist_list)
-        dist = dist_list[view_idx % num_views]
-        elev = elev_list[view_idx % num_views]
-        azim = azim_list[view_idx % num_views]
-        sector = sector_list[view_idx % num_views]
-        selected_view_ids.append(view_idx % num_views)
-    elif mode == "heuristic":
-        if use_principle and view_idx < 6:
-            selected_view_idx = view_idx
-        else:
-            selected_view_idx = None
-            max_heat = 0
-            print("=> selecting next view...")
-            view_heat_list = []
-            for sample_idx in tqdm(range(len(dist_list))):
-                view_heat, *_ = render_one_view_and_build_masks(dist_list[sample_idx], elev_list[sample_idx], azim_list[sample_idx],
-                    sample_idx, sample_idx, view_punishments,
-                    similarity_texture_cache, exist_texture,
-                    mesh, faces, verts_uvs,
-                    image_size, faces_per_pixel,
-                    init_image_dir, mask_image_dir, normal_map_dir, depth_map_dir, similarity_map_dir,
-                    device)
-                if view_heat > max_heat:
-                    selected_view_idx = sample_idx
-                    max_heat = view_heat
-                view_heat_list.append(view_heat.item())
-            print(view_heat_list)
-            print("select view {} with heat {}".format(selected_view_idx, max_heat))
-        dist = dist_list[selected_view_idx]
-        elev = elev_list[selected_view_idx]
-        azim = azim_list[selected_view_idx]
-        sector = sector_list[selected_view_idx]
-        selected_view_ids.append(selected_view_idx)
-        view_punishments[selected_view_idx] *= 0.01
-    elif mode == "random":
-        selected_view_idx = random.choice(range(len(dist_list)))
-        dist = dist_list[selected_view_idx]
-        elev = elev_list[selected_view_idx]
-        azim = azim_list[selected_view_idx]
-        sector = sector_list[selected_view_idx]
-        selected_view_ids.append(selected_view_idx)
-    else:
-        raise NotImplementedError()
-    return dist, elev, azim, sector, selected_view_ids, view_punishments
-@torch.no_grad()
-def build_backproject_mask(mesh, faces, verts_uvs,
-    cameras, reference_image, faces_per_pixel,
-    image_size, uv_size, device):
-    # construct pixel UVs
-    renderer_scaled = init_renderer(cameras,
-        shader=init_soft_phong_shader(
-            camera=cameras,
-            blend_params=BlendParams(),
-            device=device),
-        image_size=image_size,
-        faces_per_pixel=faces_per_pixel
-    )
-    fragments_scaled = renderer_scaled.rasterizer(mesh)
-    # get UV coordinates for each pixel
-    faces_verts_uvs = verts_uvs[faces.textures_idx]
-    pixel_uvs = interpolate_face_attributes(
-        fragments_scaled.pix_to_face, fragments_scaled.bary_coords, faces_verts_uvs
-    )  # NxHsxWsxKx2
-    pixel_uvs = pixel_uvs.permute(0, 3, 1, 2, 4).reshape(-1, 2)
-    texture_locations_y, texture_locations_x = get_all_4_locations(
-        (1 - pixel_uvs[:, 1]).reshape(-1) * (uv_size - 1),
-        pixel_uvs[:, 0].reshape(-1) * (uv_size - 1)
-    )
-    K = faces_per_pixel
-    texture_values = torch.from_numpy(np.array(reference_image.resize((image_size, image_size)))).float() / 255.
-    texture_values = texture_values.to(device).unsqueeze(0).expand([4, -1, -1, -1]).unsqueeze(0).expand([K, -1, -1, -1, -1])
-    # texture
-    texture_tensor = torch.zeros(uv_size, uv_size, 3).to(device)
-    texture_tensor[texture_locations_y, texture_locations_x, :] = texture_values.reshape(-1, 3)
-    return texture_tensor[:, :, 0]
-@torch.no_grad()
-def build_diffusion_mask(mesh_stuff,
-    renderer, exist_texture, similarity_texture_cache, target_value, device, image_size,
-    smooth_mask=False, view_threshold=0.01):
-    mesh, faces, verts_uvs = mesh_stuff
-    mask_mesh = mesh.clone() # NOTE in-place operation - DANGER!!!
-    # visible mask => the whole region
-    exist_texture_expand = exist_texture.unsqueeze(0).unsqueeze(-1).expand(-1, -1, -1, 3).to(device)
-    mask_mesh.textures = TexturesUV(
-        maps=torch.ones_like(exist_texture_expand),
-        faces_uvs=faces.textures_idx[None, ...],
-        verts_uvs=verts_uvs[None, ...],
-        sampling_mode="nearest"
-    )
-    # visible_mask_tensor, *_ = render(mask_mesh, renderer)
-    visible_mask_tensor, _, similarity_map_tensor, *_ = render(mask_mesh, renderer)
-    # faces that are too rotated away from the viewpoint will be treated as invisible
-    valid_mask_tensor = (similarity_map_tensor >= view_threshold).float()
-    visible_mask_tensor *= valid_mask_tensor
-    # nonexist mask <=> new mask
-    exist_texture_expand = exist_texture.unsqueeze(0).unsqueeze(-1).expand(-1, -1, -1, 3).to(device)
-    mask_mesh.textures = TexturesUV(
-        maps=1 - exist_texture_expand,
-        faces_uvs=faces.textures_idx[None, ...],
-        verts_uvs=verts_uvs[None, ...],
-        sampling_mode="nearest"
-    )
-    new_mask_tensor, *_ = render(mask_mesh, renderer)
-    new_mask_tensor *= valid_mask_tensor
-    # exist mask => visible mask - new mask
-    exist_mask_tensor = visible_mask_tensor - new_mask_tensor
-    exist_mask_tensor[exist_mask_tensor < 0] = 0 # NOTE dilate can lead to overflow
-    # all update mask
-    mask_mesh.textures = TexturesUV(
-        maps=(
-            similarity_texture_cache.argmax(0) == target_value
-            # # only consider the views that have already appeared before
-            # similarity_texture_cache[0:target_value+1].argmax(0) == target_value
-        ).float().unsqueeze(0).unsqueeze(-1).expand(-1, -1, -1, 3).to(device),
-        faces_uvs=faces.textures_idx[None, ...],
-        verts_uvs=verts_uvs[None, ...],
-        sampling_mode="nearest"
-    )
-    all_update_mask_tensor, *_ = render(mask_mesh, renderer)
-    # current update mask => intersection between all update mask and exist mask
-    update_mask_tensor = exist_mask_tensor * all_update_mask_tensor
-    # keep mask => exist mask - update mask
-    old_mask_tensor = exist_mask_tensor - update_mask_tensor
-    # convert
-    new_mask = new_mask_tensor[0].cpu().float().permute(2, 0, 1)
-    new_mask = transforms.ToPILImage()(new_mask).convert("L")
-    update_mask = update_mask_tensor[0].cpu().float().permute(2, 0, 1)
-    update_mask = transforms.ToPILImage()(update_mask).convert("L")
-    old_mask = old_mask_tensor[0].cpu().float().permute(2, 0, 1)
-    old_mask = transforms.ToPILImage()(old_mask).convert("L")
-    exist_mask = exist_mask_tensor[0].cpu().float().permute(2, 0, 1)
-    exist_mask = transforms.ToPILImage()(exist_mask).convert("L")
-    return new_mask, update_mask, old_mask, exist_mask
-@torch.no_grad()
-def render_one_view(mesh,
-    dist, elev, azim,
-    image_size, faces_per_pixel,
-    device):
-    # render the view
-    cameras = init_camera(
-        dist, elev, azim,
-        image_size, device
-    )
-    renderer = init_renderer(cameras,
-        shader=init_soft_phong_shader(
-            camera=cameras,
-            blend_params=BlendParams(),
-            device=device),
-        image_size=image_size,
-        faces_per_pixel=faces_per_pixel
-    )
-    init_images_tensor, normal_maps_tensor, similarity_tensor, depth_maps_tensor, fragments = render(mesh, renderer)
-    return (
-        cameras, renderer,
-        init_images_tensor, normal_maps_tensor, similarity_tensor, depth_maps_tensor, fragments
-    )
-@torch.no_grad()
-def build_similarity_texture_cache_for_all_views(mesh, faces, verts_uvs,
-    dist_list, elev_list, azim_list,
-    image_size, image_size_scaled, uv_size, faces_per_pixel,
-    device):
-    num_candidate_views = len(dist_list)
-    similarity_texture_cache = torch.zeros(num_candidate_views, uv_size, uv_size).to(device)
-    print("=> building similarity texture cache for all views...")
-    for i in tqdm(range(num_candidate_views)):
-        cameras, _, _, _, similarity_tensor, _, _ = render_one_view(mesh,
-            dist_list[i], elev_list[i], azim_list[i],
-            image_size, faces_per_pixel, device)
-        similarity_texture_cache[i] = build_backproject_mask(mesh, faces, verts_uvs,
-            cameras, transforms.ToPILImage()(similarity_tensor[0, :, :, 0]).convert("RGB"), faces_per_pixel,
-            image_size_scaled, uv_size, device)
-    return similarity_texture_cache
-@torch.no_grad()
-def render_one_view_and_build_masks(dist, elev, azim,
-    selected_view_idx, view_idx, view_punishments,
-    similarity_texture_cache, exist_texture,
-    mesh, faces, verts_uvs,
-    image_size, faces_per_pixel,
-    init_image_dir, mask_image_dir, normal_map_dir, depth_map_dir, similarity_map_dir,
-    device, save_intermediate=False, smooth_mask=False, view_threshold=0.01):
-    # render the view
-    (
-        cameras, renderer,
-        init_images_tensor, normal_maps_tensor, similarity_tensor, depth_maps_tensor, fragments
-    ) = render_one_view(mesh,
-        dist, elev, azim,
-        image_size, faces_per_pixel,
-        device
-    )
-    init_image = init_images_tensor[0].cpu()
-    init_image = init_image.permute(2, 0, 1)
-    init_image = transforms.ToPILImage()(init_image).convert("RGB")
-    normal_map = normal_maps_tensor[0].cpu()
-    normal_map = normal_map.permute(2, 0, 1)
-    normal_map = transforms.ToPILImage()(normal_map).convert("RGB")
-    depth_map = depth_maps_tensor[0].cpu().numpy()
-    depth_map = Image.fromarray(depth_map).convert("L")
-    similarity_map = similarity_tensor[0, :, :, 0].cpu()
-    similarity_map = transforms.ToPILImage()(similarity_map).convert("L")
-    flat_renderer = init_renderer(cameras,
-        shader=init_flat_texel_shader(
-            camera=cameras,
-            device=device),
-        image_size=image_size,
-        faces_per_pixel=faces_per_pixel
-    )
-    new_mask_image, update_mask_image, old_mask_image, exist_mask_image = build_diffusion_mask(
-        (mesh, faces, verts_uvs),
-        flat_renderer, exist_texture, similarity_texture_cache, selected_view_idx, device, image_size,
-        smooth_mask=smooth_mask, view_threshold=view_threshold
-    )
-    # NOTE the view idx is the absolute idx in the sample space (i.e. `selected_view_idx`)
-    # it should match with `similarity_texture_cache`
-    (
-        old_mask_tensor,
-        update_mask_tensor,
-        new_mask_tensor,
-        all_mask_tensor,
-        quad_mask_tensor
-    ) = compose_quad_mask(new_mask_image, update_mask_image, old_mask_image, device)
-    view_heat = compute_view_heat(similarity_tensor, quad_mask_tensor)
-    view_heat *= view_punishments[selected_view_idx]
-    # save intermediate results
-    if save_intermediate:
-        init_image.save(os.path.join(init_image_dir, "{}.png".format(view_idx)))
-        normal_map.save(os.path.join(normal_map_dir, "{}.png".format(view_idx)))
-        depth_map.save(os.path.join(depth_map_dir, "{}.png".format(view_idx)))
-        similarity_map.save(os.path.join(similarity_map_dir, "{}.png".format(view_idx)))
-        new_mask_image.save(os.path.join(mask_image_dir, "{}_new.png".format(view_idx)))
-        update_mask_image.save(os.path.join(mask_image_dir, "{}_update.png".format(view_idx)))
-        old_mask_image.save(os.path.join(mask_image_dir, "{}_old.png".format(view_idx)))
-        exist_mask_image.save(os.path.join(mask_image_dir, "{}_exist.png".format(view_idx)))
-        visualize_quad_mask(mask_image_dir, quad_mask_tensor, view_idx, view_heat, device)
-    return (
-        view_heat,
-        renderer, cameras, fragments,
-        init_image, normal_map, depth_map,
-        init_images_tensor, normal_maps_tensor, depth_maps_tensor, similarity_tensor,
-        old_mask_image, update_mask_image, new_mask_image,
-        old_mask_tensor, update_mask_tensor, new_mask_tensor, all_mask_tensor, quad_mask_tensor
-    )
-@torch.no_grad()
-def backproject_from_image(mesh, faces, verts_uvs, cameras,
-    reference_image, new_mask_image, update_mask_image,
-    init_texture, exist_texture,
-    image_size, uv_size, faces_per_pixel,
-    device):
-    # construct pixel UVs
-    renderer_scaled = init_renderer(cameras,
-        shader=init_soft_phong_shader(
-            camera=cameras,
-            blend_params=BlendParams(),
-            device=device),
-        image_size=image_size,
-        faces_per_pixel=faces_per_pixel
-    )
-    fragments_scaled = renderer_scaled.rasterizer(mesh)
-    # get UV coordinates for each pixel
-    faces_verts_uvs = verts_uvs[faces.textures_idx]
-    pixel_uvs = interpolate_face_attributes(
-        fragments_scaled.pix_to_face, fragments_scaled.bary_coords, faces_verts_uvs
-    )  # NxHsxWsxKx2
-    pixel_uvs = pixel_uvs.permute(0, 3, 1, 2, 4).reshape(pixel_uvs.shape[-2], pixel_uvs.shape[1], pixel_uvs.shape[2], 2)
-    # the update mask has to be on top of the diffusion mask
-    new_mask_image_tensor = transforms.ToTensor()(new_mask_image).to(device).unsqueeze(-1)
-    update_mask_image_tensor = transforms.ToTensor()(update_mask_image).to(device).unsqueeze(-1)
-    project_mask_image_tensor = torch.logical_or(update_mask_image_tensor, new_mask_image_tensor).float()
-    project_mask_image = project_mask_image_tensor * 255.
-    project_mask_image = Image.fromarray(project_mask_image[0, :, :, 0].cpu().numpy().astype(np.uint8))
-    project_mask_image_scaled = project_mask_image.resize(
-        (image_size, image_size),
-        Image.Resampling.NEAREST
-    )
-    project_mask_image_tensor_scaled = transforms.ToTensor()(project_mask_image_scaled).to(device)
-    pixel_uvs_masked = pixel_uvs[project_mask_image_tensor_scaled == 1]
-    texture_locations_y, texture_locations_x = get_all_4_locations(
-        (1 - pixel_uvs_masked[:, 1]).reshape(-1) * (uv_size - 1),
-        pixel_uvs_masked[:, 0].reshape(-1) * (uv_size - 1)
-    )
-    K = pixel_uvs.shape[0]
-    project_mask_image_tensor_scaled = project_mask_image_tensor_scaled[:, None, :, :, None].repeat(1, 4, 1, 1, 3)
-    texture_values = torch.from_numpy(np.array(reference_image.resize((image_size, image_size))))
-    texture_values = texture_values.to(device).unsqueeze(0).expand([4, -1, -1, -1]).unsqueeze(0).expand([K, -1, -1, -1, -1])
-    texture_values_masked = texture_values.reshape(-1, 3)[project_mask_image_tensor_scaled.reshape(-1, 3) == 1].reshape(-1, 3)
-    # texture
-    texture_tensor = torch.from_numpy(np.array(init_texture)).to(device)
-    texture_tensor[texture_locations_y, texture_locations_x, :] = texture_values_masked
-    init_texture = Image.fromarray(texture_tensor.cpu().numpy().astype(np.uint8))
-    # update texture cache
-    exist_texture[texture_locations_y, texture_locations_x] = 1
-    return init_texture, project_mask_image, exist_texture

text2tex/lib/render_helper.py DELETED Viewed

@@ -1,108 +0,0 @@
-import os
-import torch
-import cv2
-import numpy as np
-from PIL import Image
-from torchvision import transforms
-from pytorch3d.ops import interpolate_face_attributes
-from pytorch3d.renderer import (
-    RasterizationSettings,
-    MeshRendererWithFragments,
-    MeshRasterizer,
-)
-# customized
-import sys
-sys.path.append(".")
-def init_renderer(camera, shader, image_size, faces_per_pixel):
-    raster_settings = RasterizationSettings(image_size=image_size, faces_per_pixel=faces_per_pixel)
-    renderer = MeshRendererWithFragments(
-        rasterizer=MeshRasterizer(
-            cameras=camera,
-            raster_settings=raster_settings
-        ),
-        shader=shader
-    )
-    return renderer
-@torch.no_grad()
-def render(mesh, renderer, pad_value=10):
-    def phong_normal_shading(meshes, fragments) -> torch.Tensor:
-        faces = meshes.faces_packed()  # (F, 3)
-        vertex_normals = meshes.verts_normals_packed()  # (V, 3)
-        faces_normals = vertex_normals[faces]
-        pixel_normals = interpolate_face_attributes(
-            fragments.pix_to_face, fragments.bary_coords, faces_normals
-        )
-        return pixel_normals
-    def similarity_shading(meshes, fragments):
-        faces = meshes.faces_packed()  # (F, 3)
-        vertex_normals = meshes.verts_normals_packed()  # (V, 3)
-        faces_normals = vertex_normals[faces]
-        vertices = meshes.verts_packed()  # (V, 3)
-        face_positions = vertices[faces]
-        view_directions = torch.nn.functional.normalize((renderer.shader.cameras.get_camera_center().reshape(1, 1, 3) - face_positions), p=2, dim=2)
-        cosine_similarity = torch.nn.CosineSimilarity(dim=2)(faces_normals, view_directions)
-        pixel_similarity = interpolate_face_attributes(
-            fragments.pix_to_face, fragments.bary_coords, cosine_similarity.unsqueeze(-1)
-        )
-        return pixel_similarity
-    def get_relative_depth_map(fragments, pad_value=pad_value):
-        absolute_depth = fragments.zbuf[..., 0] # B, H, W
-        no_depth = -1
-        depth_min, depth_max = absolute_depth[absolute_depth != no_depth].min(), absolute_depth[absolute_depth != no_depth].max()
-        target_min, target_max = 50, 255
-        depth_value = absolute_depth[absolute_depth != no_depth]
-        depth_value = depth_max - depth_value # reverse values
-        depth_value /= (depth_max - depth_min)
-        depth_value = depth_value * (target_max - target_min) + target_min
-        relative_depth = absolute_depth.clone()
-        relative_depth[absolute_depth != no_depth] = depth_value
-        relative_depth[absolute_depth == no_depth] = pad_value # not completely black
-        return relative_depth
-    images, fragments = renderer(mesh)
-    normal_maps = phong_normal_shading(mesh, fragments).squeeze(-2)
-    similarity_maps = similarity_shading(mesh, fragments).squeeze(-2) # -1 - 1
-    depth_maps = get_relative_depth_map(fragments)
-    # normalize similarity mask to 0 - 1
-    similarity_maps = torch.abs(similarity_maps) # 0 - 1
-    # HACK erode, eliminate isolated dots
-    non_zero_similarity = (similarity_maps > 0).float()
-    non_zero_similarity = (non_zero_similarity * 255.).cpu().numpy().astype(np.uint8)[0]
-    non_zero_similarity = cv2.erode(non_zero_similarity, kernel=np.ones((3, 3), np.uint8), iterations=2)
-    non_zero_similarity = torch.from_numpy(non_zero_similarity).to(similarity_maps.device).unsqueeze(0) / 255.
-    similarity_maps = non_zero_similarity.unsqueeze(-1) * similarity_maps
-    return images, normal_maps, similarity_maps, depth_maps, fragments
-@torch.no_grad()
-def check_visible_faces(mesh, fragments):
-    pix_to_face = fragments.pix_to_face
-    # Indices of unique visible faces
-    visible_map = pix_to_face.unique()  # (num_visible_faces)
-    return visible_map

text2tex/lib/shading_helper.py DELETED Viewed

@@ -1,45 +0,0 @@
-from typing import NamedTuple, Sequence
-from pytorch3d.renderer.mesh.shader import ShaderBase
-from pytorch3d.renderer import (
-    AmbientLights,
-    SoftPhongShader
-)
-class BlendParams(NamedTuple):
-    sigma: float = 1e-4
-    gamma: float = 1e-4
-    background_color: Sequence = (1, 1, 1)
-class FlatTexelShader(ShaderBase):
-    def __init__(self, device="cpu", cameras=None, lights=None, materials=None, blend_params=None):
-        super().__init__(device, cameras, lights, materials, blend_params)
-    def forward(self, fragments, meshes, **_kwargs):
-        texels = meshes.sample_textures(fragments)
-        texels[(fragments.pix_to_face == -1), :] = 0
-        return texels.squeeze(-2)
-def init_soft_phong_shader(camera, blend_params, device):
-    lights = AmbientLights(device=device)
-    shader = SoftPhongShader(
-        cameras=camera,
-        lights=lights,
-        device=device,
-        blend_params=blend_params
-    )
-    return shader
-def init_flat_texel_shader(camera, device):
-    shader=FlatTexelShader(
-        cameras=camera,
-        device=device
-    )
-    return shader