Spaces:
Running
on
Zero
Running
on
Zero
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT | |
# except for the third-party components listed below. | |
# Hunyuan 3D does not impose any additional limitations beyond what is outlined | |
# in the repsective licenses of these third-party components. | |
# Users must comply with all terms and conditions of original licenses of these third-party | |
# components and must ensure that the usage of the third party components adheres to | |
# all relevant laws and regulations. | |
# For avoidance of doubts, Hunyuan 3D means the large language models and | |
# their software and algorithms, including trained model weights, parameters (including | |
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, | |
# fine-tuning enabling code and other elements of the foregoing made publicly available | |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. | |
import os | |
import torch | |
import random | |
import numpy as np | |
from PIL import Image | |
from typing import List | |
import huggingface_hub | |
from omegaconf import OmegaConf | |
from diffusers import DiffusionPipeline | |
from diffusers import EulerAncestralDiscreteScheduler, DDIMScheduler, UniPCMultistepScheduler | |
class multiviewDiffusionNet: | |
def __init__(self, config) -> None: | |
self.device = config.device | |
cfg_path = config.multiview_cfg_path | |
custom_pipeline = config.custom_pipeline | |
cfg = OmegaConf.load(cfg_path) | |
self.cfg = cfg | |
self.mode = self.cfg.model.params.stable_diffusion_config.custom_pipeline[2:] | |
model_path = huggingface_hub.snapshot_download( | |
repo_id=config.multiview_pretrained_path, | |
allow_patterns=["hunyuan3d-paintpbr-v2-1/*"], | |
) | |
model_path = os.path.join(model_path, "hunyuan3d-paintpbr-v2-1") | |
pipeline = DiffusionPipeline.from_pretrained( | |
model_path, | |
custom_pipeline=custom_pipeline, | |
torch_dtype=torch.float16 | |
) | |
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config, timestep_spacing="trailing") | |
pipeline.set_progress_bar_config(disable=True) | |
pipeline.eval() | |
setattr(pipeline, "view_size", cfg.model.params.get("view_size", 320)) | |
self.pipeline = pipeline.to(self.device) | |
if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino: | |
from hunyuanpaintpbr.unet.modules import Dino_v2 | |
self.dino_v2 = Dino_v2(config.dino_ckpt_path).to(torch.float16) | |
self.dino_v2 = self.dino_v2.to(self.device) | |
def seed_everything(self, seed): | |
random.seed(seed) | |
np.random.seed(seed) | |
torch.manual_seed(seed) | |
os.environ["PL_GLOBAL_SEED"] = str(seed) | |
def __call__(self, images, conditions, prompt=None, custom_view_size=None, resize_input=False): | |
pils = self.forward_one( | |
images, conditions, prompt=prompt, custom_view_size=custom_view_size, resize_input=resize_input | |
) | |
return pils | |
def forward_one(self, input_images, control_images, prompt=None, custom_view_size=None, resize_input=False): | |
self.seed_everything(0) | |
custom_view_size = custom_view_size if custom_view_size is not None else self.pipeline.view_size | |
if not isinstance(input_images, List): | |
input_images = [input_images] | |
if not resize_input: | |
input_images = [ | |
input_image.resize((self.pipeline.view_size, self.pipeline.view_size)) for input_image in input_images | |
] | |
else: | |
input_images = [input_image.resize((custom_view_size, custom_view_size)) for input_image in input_images] | |
for i in range(len(control_images)): | |
control_images[i] = control_images[i].resize((custom_view_size, custom_view_size)) | |
if control_images[i].mode == "L": | |
control_images[i] = control_images[i].point(lambda x: 255 if x > 1 else 0, mode="1") | |
kwargs = dict(generator=torch.Generator(device=self.pipeline.device).manual_seed(0)) | |
num_view = len(control_images) // 2 | |
normal_image = [[control_images[i] for i in range(num_view)]] | |
position_image = [[control_images[i + num_view] for i in range(num_view)]] | |
kwargs["width"] = custom_view_size | |
kwargs["height"] = custom_view_size | |
kwargs["num_in_batch"] = num_view | |
kwargs["images_normal"] = normal_image | |
kwargs["images_position"] = position_image | |
if hasattr(self.pipeline.unet, "use_dino") and self.pipeline.unet.use_dino: | |
dino_hidden_states = self.dino_v2(input_images[0]) | |
kwargs["dino_hidden_states"] = dino_hidden_states | |
sync_condition = None | |
infer_steps_dict = { | |
"EulerAncestralDiscreteScheduler": 30, | |
"UniPCMultistepScheduler": 15, | |
"DDIMScheduler": 50, | |
"ShiftSNRScheduler": 15, | |
} | |
mvd_image = self.pipeline( | |
input_images[0:1], | |
num_inference_steps=infer_steps_dict[self.pipeline.scheduler.__class__.__name__], | |
prompt=prompt, | |
sync_condition=sync_condition, | |
guidance_scale=3.0, | |
**kwargs, | |
).images | |
if "pbr" in self.mode: | |
mvd_image = {"albedo": mvd_image[:num_view], "mr": mvd_image[num_view:]} | |
# mvd_image = {'albedo':mvd_image[:num_view]} | |
else: | |
mvd_image = {"hdr": mvd_image} | |
return mvd_image | |