# # Toyota Motor Europe NV/SA and its affiliated companies retain all intellectual # property and proprietary rights in and to this software and related documentation. # Any commercial use, reproduction, disclosure or distribution of this software and # related documentation without an express license agreement from Toyota Motor Europe NV/SA # is strictly prohibited. # from dataclasses import dataclass from pathlib import Path from typing import Optional, Literal, Tuple import tyro import importlib from vhap.util.log import get_logger logger = get_logger(__name__) def import_module(module_name: str): module_name, class_name = module_name.rsplit(".", 1) module = getattr(importlib.import_module(module_name), class_name) return module class Config: def __getitem__(self, __name: str): if hasattr(self, __name): return getattr(self, __name) else: raise AttributeError(f"{self.__class__.__name__} has no attribute '{__name}'") @dataclass() class DataConfig(Config): root_folder: Path = '' """The root folder for the dataset.""" sequence: str = '' """The sequence name""" _target: str = "vhap.data.video_dataset.VideoDataset" """The target dataset class""" division: Optional[str] = None subset: Optional[str] = None calibrated: bool = False """Whether the cameras parameters are available""" align_cameras_to_axes: bool = True """Adjust how cameras distribute in the space with a global rotation""" camera_convention_conversion: str = 'opencv->opengl' target_extrinsic_type: Literal['w2c', 'c2w'] = 'w2c' n_downsample_rgb: Optional[int] = None """Load from downsampled RGB images to save data IO time""" scale_factor: float = 1.0 """Further apply a scaling transformation after the downsampling of RGB""" background_color: Optional[Literal['white', 'black']] = 'white' use_alpha_map: bool = False use_landmark: bool = True landmark_source: Optional[Literal['face-alignment', 'star']] = "star" @dataclass() class ModelConfig(Config): n_shape: int = 300 n_expr: int = 100 n_tex: int = 100 use_static_offset: bool = False """Optimize static offsets on top of FLAME vertices in the canonical space""" use_dynamic_offset: bool = False """Optimize dynamic offsets on top of the FLAME vertices in the canonical space""" add_teeth: bool = True """Add teeth to the FLAME model""" remove_lip_inside: bool = False """Remove the inner part of the lips from the FLAME model""" tex_resolution: int = 2048 """The resolution of the extra texture map""" tex_painted: bool = True """Use a painted texture map instead the pca texture space as the base texture map""" tex_extra: bool = True """Optimize an extra texture map as the base texture map or the residual texture map""" # tex_clusters: tuple[str, ...] = ("skin", "hair", "sclerae", "lips_tight", "boundary") tex_clusters: tuple[str, ...] = ("skin", "hair", "boundary", "lips_tight", "teeth", "sclerae", "irises") """Regions that are supposed to share a similar color inside""" residual_tex: bool = True """Use the extra texture map as a residual component on top of the base texture""" occluded: tuple[str, ...] = () # to be used for updating stage configs in __post_init__ """The regions that are occluded by the hair or garments""" flame_params_path: Optional[Path] = None @dataclass() class RenderConfig(Config): backend: Literal['nvdiffrast', 'pytorch3d'] = 'nvdiffrast' """The rendering backend""" use_opengl: bool = False """Use OpenGL for NVDiffRast""" background_train: Literal['white', 'black', 'target'] = 'target' """Background color/image for training""" disturb_rate_fg: Optional[float] = 0.5 """The rate of disturbance for the foreground""" disturb_rate_bg: Optional[float] = 0.5 """The rate of disturbance for the background. 0.6 best for multi-view, 0.3 best for single-view""" background_eval: Literal['white', 'black', 'target'] = 'target' """Background color/image for evaluation""" lighting_type: Literal['constant', 'front', 'front-range', 'SH'] = 'SH' """The type of lighting""" lighting_space: Literal['world', 'camera'] = 'world' """The space of lighting""" @dataclass() class LearningRateConfig(Config): base: float = 5e-3 """shape, texture, rotation, eyes, neck, jaw""" translation: float = 1e-3 expr: float = 5e-2 static_offset: float = 5e-4 dynamic_offset: float = 5e-4 camera: float = 5e-3 light: float = 5e-3 @dataclass() class LossWeightConfig(Config): landmark: Optional[float] = 10. always_enable_jawline_landmarks: bool = True """Always enable the landmark loss for the jawline landmarks. Ignore disable_jawline_landmarks in stages.""" photo: Optional[float] = 30. reg_shape: float = 3e-1 reg_expr: float = 3e-2 reg_tex_pca: float = 1e-4 # will make it hard to model hair color when too high reg_tex_res: Optional[float] = None # 1e2 (when w/o reg_var) """Regularize the residual texture map""" reg_tex_res_clusters: Optional[float] = 1e1 """Regularize the residual texture map inside each texture cluster""" reg_tex_res_for: tuple[str, ...] = ("sclerae", "teeth") """Regularize the residual texture map for the clusters specified""" reg_tex_tv: Optional[float] = 1e4 # important to split regions apart """Regularize the total variation of the texture map""" reg_light: Optional[float] = None """Regularize lighting parameters""" reg_diffuse: Optional[float] = 1e2 """Regularize lighting parameters by the diffuse term""" reg_offset: Optional[float] = 3e2 """Regularize the norm of offsets""" reg_offset_relax_coef: float = 1. """The coefficient for relaxing reg_offset for the regions specified""" reg_offset_relax_for: tuple[str, ...] = ("hair", "ears") """Relax the offset loss for the regions specified""" reg_offset_lap: Optional[float] = 1e6 """Regularize the difference of laplacian coordinate caused by offsets""" reg_offset_lap_relax_coef: float = 0.1 """The coefficient for relaxing reg_offset_lap for the regions specified""" reg_offset_lap_relax_for: tuple[str, ...] = ("hair", "ears") """Relax the offset loss for the regions specified""" reg_offset_rigid: Optional[float] = 3e2 """Regularize the the offsets to be as-rigid-as-possible""" reg_offset_rigid_for: tuple[str, ...] = ("left_ear", "right_ear", "neck", "left_eye", "right_eye", "lips_tight") """Regularize the the offsets to be as-rigid-as-possible for the regions specified""" reg_offset_dynamic: Optional[float] = 3e5 """Regularize the dynamic offsets to be temporally smooth""" blur_iter: int = 0 """The number of iterations for blurring vertex weights""" smooth_trans: float = 3e2 """global translation""" smooth_rot: float = 3e1 """global rotation""" smooth_neck: float = 3e1 """neck joint""" smooth_jaw: float = 1e-1 """jaw joint""" smooth_eyes: float = 0 """eyes joints""" prior_neck: float = 3e-1 """Regularize the neck joint towards neutral""" prior_jaw: float = 3e-1 """Regularize the jaw joint towards neutral""" prior_eyes: float = 3e-2 """Regularize the eyes joints towards neutral""" @dataclass() class LogConfig(Config): interval_scalar: Optional[int] = 100 """The step interval of scalar logging. Using an interval of stage_tracking.num_steps // 5 unless specified.""" interval_media: Optional[int] = 500 """The step interval of media logging. Using an interval of stage_tracking.num_steps unless specified.""" image_format: Literal['jpg', 'png'] = 'jpg' """Output image format""" view_indices: Tuple[int, ...] = () """Manually specify the view indices for log""" max_num_views: int = 3 """The maximum number of views for log""" stack_views_in_rows: bool = True @dataclass() class ExperimentConfig(Config): output_folder: Path = Path('output/track') reuse_landmarks: bool = True keyframes: Tuple[int, ...] = tuple() photometric: bool = False """enable photometric optimization, otherwise only landmark optimization""" @dataclass() class StageConfig(Config): disable_jawline_landmarks: bool = False """Disable the landmark loss for the jawline landmarks since they are not accurate""" @dataclass() class StageLmkInitRigidConfig(StageConfig): """The stage for initializing the rigid parameters""" num_steps: int = 300 optimizable_params: tuple[str, ...] = ("cam", "pose") @dataclass() class StageLmkInitAllConfig(StageConfig): """The stage for initializing all the parameters optimizable with landmark loss""" num_steps: int = 300 optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr") @dataclass() class StageLmkSequentialTrackingConfig(StageConfig): """The stage for sequential tracking with landmark loss""" num_steps: int = 50 optimizable_params: tuple[str, ...] = ("pose", "joints", "expr") @dataclass() class StageLmkGlobalTrackingConfig(StageConfig): """The stage for global tracking with landmark loss""" num_epochs: int = 0 optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr") @dataclass() class PhotometricStageConfig(StageConfig): align_texture_except: tuple[str, ...] = () """Align the inner region of rendered FLAME to the image, except for the regions specified""" align_boundary_except: tuple[str, ...] = ("bottomline",) # necessary to avoid the bottomline of FLAME from being stretched to the bottom of the image """Align the boundary of FLAME to the image, except for the regions specified""" @dataclass() class StageRgbInitTextureConfig(PhotometricStageConfig): """The stage for initializing the texture map with photometric loss""" num_steps: int = 500 optimizable_params: tuple[str, ...] = ("cam", "shape", "texture", "lights") align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") align_boundary_except: tuple[str, ...] = ("hair", "boundary") @dataclass() class StageRgbInitAllConfig(PhotometricStageConfig): """The stage for initializing all the parameters except the offsets with photometric loss""" num_steps: int = 500 optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights") disable_jawline_landmarks: bool = True align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") align_boundary_except: tuple[str, ...] = ("hair", "bottomline") @dataclass() class StageRgbInitOffsetConfig(PhotometricStageConfig): """The stage for initializing the offsets with photometric loss""" num_steps: int = 500 optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset") disable_jawline_landmarks: bool = True align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") @dataclass() class StageRgbSequentialTrackingConfig(PhotometricStageConfig): """The stage for sequential tracking with photometric loss""" num_steps: int = 50 optimizable_params: tuple[str, ...] = ("pose", "joints", "expr", "texture", "dynamic_offset") disable_jawline_landmarks: bool = True @dataclass() class StageRgbGlobalTrackingConfig(PhotometricStageConfig): """The stage for global tracking with photometric loss""" num_epochs: int = 30 optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset", "dynamic_offset") disable_jawline_landmarks: bool = True @dataclass() class PipelineConfig(Config): lmk_init_rigid: StageLmkInitRigidConfig lmk_init_all: StageLmkInitAllConfig lmk_sequential_tracking: StageLmkSequentialTrackingConfig lmk_global_tracking: StageLmkGlobalTrackingConfig rgb_init_texture: StageRgbInitTextureConfig rgb_init_all: StageRgbInitAllConfig rgb_init_offset: StageRgbInitOffsetConfig rgb_sequential_tracking: StageRgbSequentialTrackingConfig rgb_global_tracking: StageRgbGlobalTrackingConfig @dataclass() class BaseTrackingConfig(Config): data: DataConfig model: ModelConfig render: RenderConfig log: LogConfig exp: ExperimentConfig lr: LearningRateConfig w: LossWeightConfig pipeline: PipelineConfig begin_stage: Optional[str] = None """Begin from the specified stage for debugging""" begin_frame_idx: int = 0 """Begin from the specified frame index for debugging""" async_func: bool = True """Allow asynchronous function calls for speed up""" device: Literal['cuda', 'cpu'] = 'cuda' def get_occluded(self): occluded_table = { } if self.data.sequence in occluded_table: logger.info(f"Automatically setting cfg.model.occluded to {occluded_table[self.data.sequence]}") self.model.occluded = occluded_table[self.data.sequence] def __post_init__(self): self.get_occluded() if not self.model.use_static_offset and not self.model.use_dynamic_offset: self.model.occluded = tuple(list(self.model.occluded) + ['hair']) # disable boundary alignment for the hair region if no offset is used for cfg_stage in self.pipeline.__dict__.values(): if isinstance(cfg_stage, PhotometricStageConfig): cfg_stage.align_texture_except = tuple(list(cfg_stage.align_texture_except) + list(self.model.occluded)) cfg_stage.align_boundary_except = tuple(list(cfg_stage.align_boundary_except) + list(self.model.occluded)) if self.begin_stage is not None: skip = True for cfg_stage in self.pipeline.__dict__.values(): if cfg_stage.__class__.__name__.lower() == self.begin_stage: skip = False if skip: cfg_stage.num_steps = 0 if __name__ == "__main__": config = tyro.cli(BaseTrackingConfig) print(tyro.to_yaml(config))