modify misc

Browse files

Files changed (12) hide show

aegis.py +4 -4
ar_model.py +5 -5
blocklist.py +3 -3
face_blur_filter.py +2 -2
inference_utils.py +3 -3
misc.py +102 -96
model_t2w.py +2 -2
model_v2w.py +1 -1
text2world.py +1 -1
text2world_hf.py +1 -1
video2world.py +1 -1
video_content_safety_filter.py +2 -2

aegis.py CHANGED Viewed

@@ -22,10 +22,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from .categories import UNSAFE_CATEGORIES
 from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner
-from . import misc
-SAFE = misc.Color.green("SAFE")
-UNSAFE = misc.Color.red("UNSAFE")
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/aegis"
@@ -120,7 +120,7 @@ def parse_args():
 def main(args):
     aegis = Aegis(checkpoint_dir=args.checkpoint_dir)
     runner = GuardrailRunner(safety_models=[aegis])
-    with misc.timer("aegis safety check"):
         safety, message = runner.run_safety_check(args.prompt)
     log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
     log.info(f"Message: {message}") if not safety else None

 from .categories import UNSAFE_CATEGORIES
 from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner
+from .misc import misc, Color, timer
+SAFE = Color.green("SAFE")
+UNSAFE = Color.red("UNSAFE")
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/aegis"
 def main(args):
     aegis = Aegis(checkpoint_dir=args.checkpoint_dir)
     runner = GuardrailRunner(safety_models=[aegis])
+    with timer("aegis safety check"):
         safety, message = runner.run_safety_check(args.prompt)
     log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
     log.info(f"Message: {message}") if not safety else None

ar_model.py CHANGED Viewed

@@ -36,7 +36,7 @@ from .checkpoint import (
     substrings_to_ignore,
 )
 from .sampling import decode_n_tokens, decode_one_token, prefill
-from . import misc
 class AutoRegressiveModel(torch.nn.Module):
@@ -96,7 +96,7 @@ class AutoRegressiveModel(torch.nn.Module):
         """
         model_config = self.config
         ckpt_path = model_config.ckpt_path
-        with misc.timer(f"loading checkpoint from {ckpt_path}"):
             if ckpt_path.endswith("safetensors"):
                 # Load with safetensors API
                 checkpoint = load_file(ckpt_path, device="cpu")
@@ -142,7 +142,7 @@ class AutoRegressiveModel(torch.nn.Module):
             )
         # Remove the "model." prefix in the state_dict
         llm_checkpoint = process_state_dict(llm_checkpoint, prefix_to_remove="model.")
-        with misc.timer("loading state_dict into model"):
             missing_keys, _ = model.load_state_dict(llm_checkpoint, strict=True)
         # Remove keys with "_extra_state" suffix in missing_keys (defined by TransformerEngine for FP8 usage)
         missing_keys = [k for k in missing_keys if not k.endswith("_extra_state")]
@@ -217,7 +217,7 @@ class AutoRegressiveModel(torch.nn.Module):
                 # Override the default model configuration with the parameters from the checkpoint
                 setattr(model_config, key, value)
-        with misc.timer(f"loading checkpoint from {ckpt_path}"):
             if ckpt_path.endswith("safetensors"):
                 # Load with safetensors API
                 checkpoint = load_file(ckpt_path, device="cpu")
@@ -293,7 +293,7 @@ class AutoRegressiveModel(torch.nn.Module):
         # Remove the "model." prefix in the state_dict
         llm_checkpoint = process_state_dict(llm_checkpoint, prefix_to_remove="model.")
-        with misc.timer("loading state_dict into model"):
             missing_keys, unexpected_keys = model.load_state_dict(llm_checkpoint, strict=True)
         # Remove keys with "_extra_state" suffix in missing_keys (defined by TransformerEngine for FP8 usage)
         missing_keys = [k for k in missing_keys if not k.endswith("_extra_state")]

     substrings_to_ignore,
 )
 from .sampling import decode_n_tokens, decode_one_token, prefill
+from .misc import misc, Color, timer
 class AutoRegressiveModel(torch.nn.Module):
         """
         model_config = self.config
         ckpt_path = model_config.ckpt_path
+        with timer(f"loading checkpoint from {ckpt_path}"):
             if ckpt_path.endswith("safetensors"):
                 # Load with safetensors API
                 checkpoint = load_file(ckpt_path, device="cpu")
             )
         # Remove the "model." prefix in the state_dict
         llm_checkpoint = process_state_dict(llm_checkpoint, prefix_to_remove="model.")
+        with timer("loading state_dict into model"):
             missing_keys, _ = model.load_state_dict(llm_checkpoint, strict=True)
         # Remove keys with "_extra_state" suffix in missing_keys (defined by TransformerEngine for FP8 usage)
         missing_keys = [k for k in missing_keys if not k.endswith("_extra_state")]
                 # Override the default model configuration with the parameters from the checkpoint
                 setattr(model_config, key, value)
+        with timer(f"loading checkpoint from {ckpt_path}"):
             if ckpt_path.endswith("safetensors"):
                 # Load with safetensors API
                 checkpoint = load_file(ckpt_path, device="cpu")
         # Remove the "model." prefix in the state_dict
         llm_checkpoint = process_state_dict(llm_checkpoint, prefix_to_remove="model.")
+        with timer("loading state_dict into model"):
             missing_keys, unexpected_keys = model.load_state_dict(llm_checkpoint, strict=True)
         # Remove keys with "_extra_state" suffix in missing_keys (defined by TransformerEngine for FP8 usage)
         missing_keys = [k for k in missing_keys if not k.endswith("_extra_state")]

blocklist.py CHANGED Viewed

@@ -25,10 +25,10 @@ from better_profanity import profanity
 from .guardrail_blocklist_utils import read_keyword_list_from_dir, to_ascii
 from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner
-from . import misc
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/blocklist"
-CENSOR = misc.Color.red("*")
 class Blocklist(ContentSafetyGuardrail):
@@ -208,7 +208,7 @@ def parse_args():
 def main(args):
     blocklist = Blocklist(checkpoint_dir=args.checkpoint_dir)
     runner = GuardrailRunner(safety_models=[blocklist])
-    with misc.timer("blocklist safety check"):
         safety, message = runner.run_safety_check(args.prompt)
     log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
     log.info(f"Message: {message}") if not safety else None

 from .guardrail_blocklist_utils import read_keyword_list_from_dir, to_ascii
 from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner
+from .misc import misc, Color, timer
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/blocklist"
+CENSOR = Color.red("*")
 class Blocklist(ContentSafetyGuardrail):
 def main(args):
     blocklist = Blocklist(checkpoint_dir=args.checkpoint_dir)
     runner = GuardrailRunner(safety_models=[blocklist])
+    with timer("blocklist safety check"):
         safety, message = runner.run_safety_check(args.prompt)
     log.info(f"Input is: {'SAFE' if safety else 'UNSAFE'}")
     log.info(f"Message: {message}") if not safety else None

face_blur_filter.py CHANGED Viewed

@@ -29,7 +29,7 @@ from .guardrail_core import GuardrailRunner, PostprocessingGuardrail
 from .guardrail_io_utils import get_video_filepaths, read_video, save_video
 from .blur_utils import pixelate_face
 from .retinaface_utils import decode_batch, filter_detected_boxes, load_model
-from . import misc
 DEFAULT_RETINAFACE_CHECKPOINT = "checkpoints/Cosmos-1.0-Guardrail/face_blur_filter/Resnet50_Final.pth"
@@ -212,7 +212,7 @@ def main(args):
     for filepath in tqdm(filepaths):
         video_data = read_video(filepath)
-        with misc.timer("face blur filter"):
             frames = postprocessing_runner.postprocess(video_data.frames)
         output_path = os.path.join(args.output_dir, os.path.basename(filepath))

 from .guardrail_io_utils import get_video_filepaths, read_video, save_video
 from .blur_utils import pixelate_face
 from .retinaface_utils import decode_batch, filter_detected_boxes, load_model
+from .misc import misc, Color, timer
 DEFAULT_RETINAFACE_CHECKPOINT = "checkpoints/Cosmos-1.0-Guardrail/face_blur_filter/Resnet50_Final.pth"
     for filepath in tqdm(filepaths):
         video_data = read_video(filepath)
+        with timer("face blur filter"):
             frames = postprocessing_runner.postprocess(video_data.frames)
         output_path = os.path.join(args.output_dir, os.path.basename(filepath))

inference_utils.py CHANGED Viewed

@@ -28,7 +28,7 @@ from .model_t2w import DiffusionT2WModel
 from .model_v2w import DiffusionV2WModel
 from .config_helper import get_config_module, override
 from .utils_io import load_from_fileobj
-from .misc import arch_invariant_rand
 TORCH_VERSION: Tuple[int, ...] = tuple(int(x) for x in torch.__version__.split(".")[:2])
 if TORCH_VERSION >= (1, 11):
@@ -418,7 +418,7 @@ def generate_world_from_text(
     3. Decodes latents to pixel space
     """
     x_sigma_max = (
-        arch_invariant_rand(
             (1,) + tuple(state_shape),
             torch.float32,
             model.tensor_kwargs["device"],
@@ -484,7 +484,7 @@ def generate_world_from_video(
     num_of_latent_condition = compute_num_latent_frames(model, num_input_frames)
     x_sigma_max = (
-        arch_invariant_rand(
             (1,) + tuple(state_shape),
             torch.float32,
             model.tensor_kwargs["device"],

 from .model_v2w import DiffusionV2WModel
 from .config_helper import get_config_module, override
 from .utils_io import load_from_fileobj
+from .misc import misc
 TORCH_VERSION: Tuple[int, ...] = tuple(int(x) for x in torch.__version__.split(".")[:2])
 if TORCH_VERSION >= (1, 11):
     3. Decodes latents to pixel space
     """
     x_sigma_max = (
+        misc.arch_invariant_rand(
             (1,) + tuple(state_shape),
             torch.float32,
             model.tensor_kwargs["device"],
     num_of_latent_condition = compute_num_latent_frames(model, num_input_frames)
     x_sigma_max = (
+        misc.arch_invariant_rand(
             (1,) + tuple(state_shape),
             torch.float32,
             model.tensor_kwargs["device"],

misc.py CHANGED Viewed

@@ -29,109 +29,115 @@ import numpy as np
 import termcolor
 import torch
-from . import distributed
-def to(
-    data: Any,
-    device: str | torch.device | None = None,
-    dtype: torch.dtype | None = None,
-    memory_format: torch.memory_format = torch.preserve_format,
-) -> Any:
-    """Recursively cast data into the specified device, dtype, and/or memory_format.
-    The input data can be a tensor, a list of tensors, a dict of tensors.
-    See the documentation for torch.Tensor.to() for details.
-    Args:
-        data (Any): Input data.
-        device (str | torch.device): GPU device (default: None).
-        dtype (torch.dtype): data type (default: None).
-        memory_format (torch.memory_format): memory organization format (default: torch.preserve_format).
-    Returns:
-        data (Any): Data cast to the specified device, dtype, and/or memory_format.
-    """
-    assert (
-        device is not None or dtype is not None or memory_format is not None
-    ), "at least one of device, dtype, memory_format should be specified"
-    if isinstance(data, torch.Tensor):
-        is_cpu = (isinstance(device, str) and device == "cpu") or (
-            isinstance(device, torch.device) and device.type == "cpu"
-        )
-        data = data.to(
-            device=device,
-            dtype=dtype,
-            memory_format=memory_format,
-            non_blocking=(not is_cpu),
-        )
-        return data
-    elif isinstance(data, collections.abc.Mapping):
-        return type(data)({key: to(data[key], device=device, dtype=dtype, memory_format=memory_format) for key in data})
-    elif isinstance(data, collections.abc.Sequence) and not isinstance(data, (str, bytes)):
-        return type(data)([to(elem, device=device, dtype=dtype, memory_format=memory_format) for elem in data])
-    else:
-        return data
-def serialize(data: Any) -> Any:
-    """Serialize data by hierarchically traversing through iterables.
-    Args:
-        data (Any): Input data.
-    Returns:
-        data (Any): Serialized data.
-    """
-    if isinstance(data, collections.abc.Mapping):
-        return type(data)({key: serialize(data[key]) for key in data})
-    elif isinstance(data, collections.abc.Sequence) and not isinstance(data, (str, bytes)):
-        return type(data)([serialize(elem) for elem in data])
-    else:
-        try:
-            json.dumps(data)
-        except TypeError:
-            data = str(data)
-        return data
-def set_random_seed(seed: int, by_rank: bool = False) -> None:
-    """Set random seed. This includes random, numpy, Pytorch.
-    Args:
-        seed (int): Random seed.
-        by_rank (bool): if true, each GPU will use a different random seed.
-    """
-    if by_rank:
-        seed += distributed.get_rank()
-    log.info(f"Using random seed {seed}.")
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)  # sets seed on the current CPU & all GPUs
-def arch_invariant_rand(
-    shape: List[int] | Tuple[int], dtype: torch.dtype, device: str | torch.device, seed: int | None = None
-):
-    """Produce a GPU-architecture-invariant randomized Torch tensor.
-    Args:
-        shape (list or tuple of ints): Output tensor shape.
-        dtype (torch.dtype): Output tensor type.
-        device (torch.device): Device holding the output.
-        seed (int): Optional randomization seed.
-    Returns:
-        tensor (torch.tensor): Randomly-generated tensor.
-    """
-    # Create a random number generator, optionally seeded
-    rng = np.random.RandomState(seed)
-    # # Generate random numbers using the generator
-    random_array = rng.standard_normal(shape).astype(np.float32)  # Use standard_normal for normal distribution
-    # Convert to torch tensor and return
-    return torch.from_numpy(random_array).to(dtype=dtype, device=device)
 T = TypeVar("T", bound=Callable[..., Any])

 import termcolor
 import torch
+from .distributed import get_rank
+class misc():
+    @staticmethod
+    def to(
+        data: Any,
+        device: str | torch.device | None = None,
+        dtype: torch.dtype | None = None,
+        memory_format: torch.memory_format = torch.preserve_format,
+    ) -> Any:
+        """Recursively cast data into the specified device, dtype, and/or memory_format.
+        The input data can be a tensor, a list of tensors, a dict of tensors.
+        See the documentation for torch.Tensor.to() for details.
+        Args:
+            data (Any): Input data.
+            device (str | torch.device): GPU device (default: None).
+            dtype (torch.dtype): data type (default: None).
+            memory_format (torch.memory_format): memory organization format (default: torch.preserve_format).
+        Returns:
+            data (Any): Data cast to the specified device, dtype, and/or memory_format.
+        """
+        assert (
+            device is not None or dtype is not None or memory_format is not None
+        ), "at least one of device, dtype, memory_format should be specified"
+        if isinstance(data, torch.Tensor):
+            is_cpu = (isinstance(device, str) and device == "cpu") or (
+                isinstance(device, torch.device) and device.type == "cpu"
+            )
+            data = data.to(
+                device=device,
+                dtype=dtype,
+                memory_format=memory_format,
+                non_blocking=(not is_cpu),
+            )
+            return data
+        elif isinstance(data, collections.abc.Mapping):
+            return type(data)({key: to(data[key], device=device, dtype=dtype, memory_format=memory_format) for key in data})
+        elif isinstance(data, collections.abc.Sequence) and not isinstance(data, (str, bytes)):
+            return type(data)([to(elem, device=device, dtype=dtype, memory_format=memory_format) for elem in data])
+        else:
+            return data
+    @staticmethod
+    def serialize(data: Any) -> Any:
+        """Serialize data by hierarchically traversing through iterables.
+        Args:
+            data (Any): Input data.
+        Returns:
+            data (Any): Serialized data.
+        """
+        if isinstance(data, collections.abc.Mapping):
+            return type(data)({key: serialize(data[key]) for key in data})
+        elif isinstance(data, collections.abc.Sequence) and not isinstance(data, (str, bytes)):
+            return type(data)([serialize(elem) for elem in data])
+        else:
+            try:
+                json.dumps(data)
+            except TypeError:
+                data = str(data)
+            return data
+    @staticmethod
+    def set_random_seed(seed: int, by_rank: bool = False) -> None:
+        """Set random seed. This includes random, numpy, Pytorch.
+        Args:
+            seed (int): Random seed.
+            by_rank (bool): if true, each GPU will use a different random seed.
+        """
+        if by_rank:
+            seed += get_rank()
+        log.info(f"Using random seed {seed}.")
+        random.seed(seed)
+        np.random.seed(seed)
+        torch.manual_seed(seed)  # sets seed on the current CPU & all GPUs
+    @staticmethod
+    def arch_invariant_rand(
+        shape: List[int] | Tuple[int], dtype: torch.dtype, device: str | torch.device, seed: int | None = None
+    ):
+        """Produce a GPU-architecture-invariant randomized Torch tensor.
+        Args:
+            shape (list or tuple of ints): Output tensor shape.
+            dtype (torch.dtype): Output tensor type.
+            device (torch.device): Device holding the output.
+            seed (int): Optional randomization seed.
+        Returns:
+            tensor (torch.tensor): Randomly-generated tensor.
+        """
+        # Create a random number generator, optionally seeded
+        rng = np.random.RandomState(seed)
+        # # Generate random numbers using the generator
+        random_array = rng.standard_normal(shape).astype(np.float32)  # Use standard_normal for normal distribution
+        # Convert to torch tensor and return
+        return torch.from_numpy(random_array).to(dtype=dtype, device=device)
 T = TypeVar("T", bound=Callable[..., Any])

model_t2w.py CHANGED Viewed

@@ -25,7 +25,7 @@ from .res_sampler import COMMON_SOLVER_OPTIONS, Sampler
 from .diffusion_types import DenoisePrediction
 from .blocks import FourierFeatures
 from .pretrained_vae import BaseVAE
-from . import misc
 from . import instantiate as lazy_instantiate
 from .log import log
@@ -96,7 +96,7 @@ class DiffusionT2WModel(torch.nn.Module):
         if hasattr(self.tokenizer, "reset_dtype"):
             self.tokenizer.reset_dtype()
-    @misc.timer("DiffusionModel: set_up_model")
     def set_up_model(self, memory_format: torch.memory_format = torch.preserve_format):
         """Initialize the core model components including network, conditioner and logvar."""
         self.model = self.build_model()

 from .diffusion_types import DenoisePrediction
 from .blocks import FourierFeatures
 from .pretrained_vae import BaseVAE
+from .misc import misc, Color, timer
 from . import instantiate as lazy_instantiate
 from .log import log
         if hasattr(self.tokenizer, "reset_dtype"):
             self.tokenizer.reset_dtype()
+    @timer("DiffusionModel: set_up_model")
     def set_up_model(self, memory_format: torch.memory_format = torch.preserve_format):
         """Initialize the core model components including network, conditioner and logvar."""
         self.model = self.build_model()

model_v2w.py CHANGED Viewed

@@ -24,7 +24,7 @@ from .conditioner import VideoExtendCondition
 from .config_base_conditioner import VideoCondBoolConfig
 from .batch_ops import batch_mul
 from .model_t2w import DiffusionT2WModel
-from . import misc
 @dataclass

 from .config_base_conditioner import VideoCondBoolConfig
 from .batch_ops import batch_mul
 from .model_t2w import DiffusionT2WModel
+from .misc import misc, Color, timer
 @dataclass

text2world.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch
 from .inference_utils import add_common_arguments, validate_args
 from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
-from . import misc
 from .utils_io import read_prompts_from_file, save_video
 torch.enable_grad(False)

 from .inference_utils import add_common_arguments, validate_args
 from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
+from .misc import misc, Color, timer
 from .utils_io import read_prompts_from_file, save_video
 torch.enable_grad(False)

text2world_hf.py CHANGED Viewed

@@ -6,7 +6,7 @@ from transformers import PreTrainedModel, PretrainedConfig
 from .inference_utils import add_common_arguments, validate_args
 from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
 from .log import log
-from . import misc
 from .utils_io import read_prompts_from_file, save_video

 from .inference_utils import add_common_arguments, validate_args
 from .world_generation_pipeline import DiffusionText2WorldGenerationPipeline
 from .log import log
+from .misc import misc, Color, timer
 from .utils_io import read_prompts_from_file, save_video

video2world.py CHANGED Viewed

@@ -21,7 +21,7 @@ import torch
 from .inference_utils import add_common_arguments, check_input_frames, validate_args
 from .world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline
-from . import misc
 from .utils_io import read_prompts_from_file, save_video
 torch.enable_grad(False)

 from .inference_utils import add_common_arguments, check_input_frames, validate_args
 from .world_generation_pipeline import DiffusionVideo2WorldGenerationPipeline
+from .misc import misc, Color, timer
 from .utils_io import read_prompts_from_file, save_video
 torch.enable_grad(False)

video_content_safety_filter.py CHANGED Viewed

@@ -26,7 +26,7 @@ from .guardrail_core import ContentSafetyGuardrail, GuardrailRunner
 from .guardrail_io_utils import get_video_filepaths, read_video
 from .video_content_safety_filter_model import ModelConfig, VideoSafetyModel
 from .video_content_safety_filter_vision_encoder import SigLIPEncoder
-from . import misc
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/video_content_safety_filter"
@@ -178,7 +178,7 @@ def main(args):
     runner = GuardrailRunner(safety_models=[video_filter], generic_safe_msg="Video is safe")
     for filepath in filepaths:
-        with misc.timer("video content safety filter"):
             _ = runner.run_safety_check(filepath)

 from .guardrail_io_utils import get_video_filepaths, read_video
 from .video_content_safety_filter_model import ModelConfig, VideoSafetyModel
 from .video_content_safety_filter_vision_encoder import SigLIPEncoder
+from .misc import misc, Color, timer
 DEFAULT_CHECKPOINT_DIR = "checkpoints/Cosmos-1.0-Guardrail/video_content_safety_filter"
     runner = GuardrailRunner(safety_models=[video_filter], generic_safe_msg="Video is safe")
     for filepath in filepaths:
+        with timer("video content safety filter"):
             _ = runner.run_safety_check(filepath)