jbilcke-hf
/

HunyuanVideoGP-HFIE

Text-to-Video

Safetensors

Model card Files Files and versions

xet

Community

jbilcke-hf HF Staff commited on Feb 27

Commit

3b5290e

verified ·

1 Parent(s): fa8e04c

Update handler.py

Browse files

Files changed (1) hide show

handler.py +199 -17

handler.py CHANGED Viewed

@@ -1,14 +1,31 @@
 from typing import Dict, Any
 import os
 import shutil
-from pathlib import Path
 import time
-from datetime import datetime
 import argparse
 from loguru import logger
 from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.inference import HunyuanVideoSampler
-from hyvideo.constants import NEGATIVE_PROMPT
 # Configure logger
 logger.add("handler_debug.log", rotation="500 MB")
@@ -16,10 +33,13 @@ logger.add("handler_debug.log", rotation="500 MB")
 DEFAULT_RESOLUTION = "720p"
 DEFAULT_WIDTH = 1280
 DEFAULT_HEIGHT = 720
-DEFAULT_NB_FRAMES = (4 * 30) + 1 # or 129 (note: hunyan requires an extra +1 frame)
-DEFAULT_NB_STEPS = 22 # or 50
 DEFAULT_FPS = 24
 def setup_vae_path(vae_path: Path) -> Path:
     """Create a temporary directory with correctly named VAE config file"""
     tmp_vae_dir = Path("/tmp/vae")
@@ -124,14 +144,72 @@ def get_default_args():
     parser.add_argument("--ulysses-degree", type=int, default=1)
     parser.add_argument("--ring-degree", type=int, default=1)
     # Parse with empty args list to avoid reading sys.argv
     args = parser.parse_args([])
     return args
 class EndpointHandler:
     def __init__(self, path: str = ""):
-        """Initialize the handler with model path and default config."""
         logger.info(f"Initializing EndpointHandler with path: {path}")
         # Use default args instead of parsing from command line
@@ -144,14 +222,22 @@ class EndpointHandler:
         # Set up model paths
         self.args.model_base = path
-        # Set paths for model components
-        dit_weight_path = Path(path) / "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt"
-        original_vae_path = Path(path) / "hunyuan-video-t2v-720p/vae"
-        # to save on memory, we activate fp8 weights and we override the previous dit_weight_path setting
         self.args.use_fp8 = True
         dit_weight_path = Path(path) / "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states_fp8.pt"
         # Log all critical paths
         logger.info(f"Model base path: {self.args.model_base}")
         logger.info(f"DiT weight path: {dit_weight_path}")
@@ -170,7 +256,6 @@ class EndpointHandler:
             tmp_vae_path = setup_vae_path(original_vae_path)
             # Override the VAE path in constants to use our temporary directory
-            from hyvideo.constants import VAE_PATH, TEXT_ENCODER_PATH, TOKENIZER_PATH
             VAE_PATH["884-16c-hy"] = str(tmp_vae_path)
             logger.info(f"Updated VAE_PATH to: {VAE_PATH['884-16c-hy']}")
@@ -196,16 +281,83 @@ class EndpointHandler:
             logger.info(f"TOKENIZER_PATH['clipL']: {TOKENIZER_PATH['clipL']}")
         self.args.dit_weight = str(dit_weight_path)
-        # Initialize model
-        models_root_path = Path(path)
         if not models_root_path.exists():
             raise ValueError(f"models_root_path does not exist: {models_root_path}")
         try:
             logger.info("Attempting to initialize HunyuanVideoSampler...")
             self.model = HunyuanVideoSampler.from_pretrained(models_root_path, args=self.args)
             logger.info("Successfully initialized HunyuanVideoSampler")
         except Exception as e:
             logger.error(f"Error initializing model: {str(e)}")
             raise
@@ -232,12 +384,27 @@ class EndpointHandler:
         guidance_scale = float(data.pop("guidance_scale", 1.0))
         flow_shift = float(data.pop("flow_shift", 7.0))
         embedded_guidance_scale = float(data.pop("embedded_guidance_scale", 6.0))
         logger.info(f"Processing with parameters: width={width}, height={height}, "
                    f"video_length={video_length}, seed={seed}, "
                    f"num_inference_steps={num_inference_steps}")
         try:
             # Run inference
             outputs = self.model.predict(
                 prompt=prompt,
@@ -251,7 +418,8 @@ class EndpointHandler:
                 num_videos_per_prompt=1,
                 flow_shift=flow_shift,
                 batch_size=1,
-                embedded_guidance_scale=embedded_guidance_scale
             )
             # Get the video tensor
@@ -265,7 +433,6 @@ class EndpointHandler:
             # Read video file and convert to base64
             with open(temp_path, "rb") as f:
                 video_bytes = f.read()
-            import base64
             video_base64 = base64.b64encode(video_bytes).decode()
             # Add MP4 data URI prefix
@@ -274,10 +441,25 @@ class EndpointHandler:
             # Cleanup
             os.remove(temp_path)
             logger.info("Successfully generated and encoded video")
             return video_data_uri
         except Exception as e:
             logger.error(f"Error during video generation: {str(e)}")
             raise

 from typing import Dict, Any
 import os
 import shutil
+import gc
 import time
+from pathlib import Path
 import argparse
+from datetime import datetime
 from loguru import logger
+import torch
+import base64
 from hyvideo.utils.file_utils import save_videos_grid
 from hyvideo.inference import HunyuanVideoSampler
+from hyvideo.constants import NEGATIVE_PROMPT, VAE_PATH, TEXT_ENCODER_PATH, TOKENIZER_PATH
+from hyvideo.modules.attenion import get_attention_modes
+try:
+    import triton
+    has_triton = True
+except ImportError:
+    has_triton = False
+try:
+    from mmgp import offload, safetensors2, profile_type
+    has_mmgp = True
+except ImportError:
+    has_mmgp = False
 # Configure logger
 logger.add("handler_debug.log", rotation="500 MB")
 DEFAULT_RESOLUTION = "720p"
 DEFAULT_WIDTH = 1280
 DEFAULT_HEIGHT = 720
+DEFAULT_NB_FRAMES = (4 * 30) + 1  # or 129 (note: hunyan requires an extra +1 frame)
+DEFAULT_NB_STEPS = 22  # Default for standard model
 DEFAULT_FPS = 24
+# Get supported attention modes
+attention_modes_supported = get_attention_modes()
 def setup_vae_path(vae_path: Path) -> Path:
     """Create a temporary directory with correctly named VAE config file"""
     tmp_vae_dir = Path("/tmp/vae")
     parser.add_argument("--ulysses-degree", type=int, default=1)
     parser.add_argument("--ring-degree", type=int, default=1)
+    # Added from gradio server
+    parser.add_argument("--attention", type=str, default="auto",
+                       choices=["auto", "sdpa", "flash", "sage", "sage2", "xformers"])
+    parser.add_argument("--profile", type=int, default=1)  # HighRAM_HighVRAM
+    parser.add_argument("--quantize-transformer", action="store_true", default=False)
+    parser.add_argument("--tea-cache", type=float, default=0.0)
+    parser.add_argument("--compile", action="store_true", default=False)
+    parser.add_argument("--enable-riflex", action="store_true", default=True)
+    parser.add_argument("--vae-config", type=int, default=0)
     # Parse with empty args list to avoid reading sys.argv
     args = parser.parse_args([])
     return args
+def get_auto_attention():
+    """Select the best available attention mode"""
+    for attn in ["sage2", "sage", "sdpa"]:
+        if attn in attention_modes_supported:
+            return attn
+    return "sdpa"
+def setup_vae_config(device_mem_capacity, vae, vae_config=0):
+    """Configure VAE tiling based on available VRAM"""
+    if vae_config == 0:
+        # Auto-select based on VRAM
+        if device_mem_capacity >= 24000:
+            use_vae_config = 1
+        elif device_mem_capacity >= 16000:
+            use_vae_config = 3
+        elif device_mem_capacity >= 12000:
+            use_vae_config = 4
+        else:
+            use_vae_config = 5
+    else:
+        use_vae_config = vae_config
+    # VAE tiling configuration options
+    if use_vae_config == 1:
+        sample_tsize = 32
+        sample_size = 256
+    elif use_vae_config == 2:
+        sample_tsize = 64
+        sample_size = 192
+    elif use_vae_config == 3:
+        sample_tsize = 32
+        sample_size = 192
+    elif use_vae_config == 4:
+        sample_tsize = 16
+        sample_size = 256
+    else:
+        sample_tsize = 16
+        sample_size = 192
+    # Apply settings
+    vae.tile_sample_min_tsize = sample_tsize
+    vae.tile_latent_min_tsize = sample_tsize // vae.time_compression_ratio
+    vae.tile_sample_min_size = sample_size
+    vae.tile_latent_min_size = int(sample_size / (2 ** (len(vae.config.block_out_channels) - 1)))
+    vae.tile_overlap_factor = 0.25
+    return use_vae_config
 class EndpointHandler:
     def __init__(self, path: str = ""):
+        """Initialize the handler with model path and config."""
         logger.info(f"Initializing EndpointHandler with path: {path}")
         # Use default args instead of parsing from command line
         # Set up model paths
         self.args.model_base = path
+        # Model configurations
+        self.init_model_paths(path)
+        self.configure_model()
+        # Initialize model
+        self.initialize_model()
+    def init_model_paths(self, path):
+        """Setup paths for model components"""
+        # We'll use the FP8 model for memory efficiency
         self.args.use_fp8 = True
+        # Model component paths
         dit_weight_path = Path(path) / "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states_fp8.pt"
+        original_vae_path = Path(path) / "hunyuan-video-t2v-720p/vae"
         # Log all critical paths
         logger.info(f"Model base path: {self.args.model_base}")
         logger.info(f"DiT weight path: {dit_weight_path}")
             tmp_vae_path = setup_vae_path(original_vae_path)
             # Override the VAE path in constants to use our temporary directory
             VAE_PATH["884-16c-hy"] = str(tmp_vae_path)
             logger.info(f"Updated VAE_PATH to: {VAE_PATH['884-16c-hy']}")
             logger.info(f"TOKENIZER_PATH['clipL']: {TOKENIZER_PATH['clipL']}")
         self.args.dit_weight = str(dit_weight_path)
+    def configure_model(self):
+        """Configure model based on available hardware and settings"""
+        # Set attention mode (auto-select best available if set to 'auto')
+        if self.args.attention == "auto":
+            self.attention_mode = get_auto_attention()
+        elif self.args.attention in attention_modes_supported:
+            self.attention_mode = self.args.attention
+        else:
+            logger.warning(f"Attention mode {self.args.attention} not supported. Falling back to sdpa.")
+            self.attention_mode = "sdpa"
+        logger.info(f"Using attention mode: {self.attention_mode}")
+        # Set compilation flag based on Triton availability
+        if self.args.compile and not has_triton:
+            logger.warning("Compilation requested but Triton not available. Compilation disabled.")
+            self.args.compile = False
+        # Set profile based on memory configuration
+        # We default to HighRAM_HighVRAM (1) as specified
+        if has_mmgp:
+            self.profile = self.args.profile
+            logger.info(f"Using memory profile: {self.profile}")
+        else:
+            logger.warning("MMGP not available. Memory profiles not used.")
+    def initialize_model(self):
+        """Initialize the model with configured settings"""
+        models_root_path = Path(self.args.model_base)
         if not models_root_path.exists():
             raise ValueError(f"models_root_path does not exist: {models_root_path}")
         try:
             logger.info("Attempting to initialize HunyuanVideoSampler...")
+            # Apply attention mode setting
+            self.args.attention = self.attention_mode
             self.model = HunyuanVideoSampler.from_pretrained(models_root_path, args=self.args)
+            # Set attention mode for transformer blocks
+            if hasattr(self.model, 'pipeline') and hasattr(self.model.pipeline, 'transformer'):
+                transformer = self.model.pipeline.transformer
+                transformer.attention_mode = self.attention_mode
+                # Apply to all blocks
+                if hasattr(transformer, 'double_blocks'):
+                    for module in transformer.double_blocks:
+                        module.attention_mode = self.attention_mode
+                if hasattr(transformer, 'single_blocks'):
+                    for module in transformer.single_blocks:
+                        module.attention_mode = self.attention_mode
+                # Enable compilation if requested
+                if self.args.compile:
+                    transformer.any_compilation = True
+                    logger.info("PyTorch compilation enabled for transformer")
+                # Enable TeaCache if requested
+                if self.args.tea_cache > 0:
+                    transformer.enable_teacache = True
+                    transformer.rel_l1_thresh = self.args.tea_cache
+                    logger.info(f"TeaCache enabled with threshold: {self.args.tea_cache}")
+                else:
+                    transformer.enable_teacache = False
+            # Apply VAE tiling configuration if supported
+            if hasattr(self.model, 'vae'):
+                if torch.cuda.is_available():
+                    device_mem_capacity = torch.cuda.get_device_properties(0).total_memory / 1048576
+                    vae_config = setup_vae_config(device_mem_capacity, self.model.vae, self.args.vae_config)
+                    logger.info(f"Configured VAE tiling with config: {vae_config}")
+                else:
+                    logger.warning("CUDA not available, using default VAE configuration")
             logger.info("Successfully initialized HunyuanVideoSampler")
         except Exception as e:
             logger.error(f"Error initializing model: {str(e)}")
             raise
         guidance_scale = float(data.pop("guidance_scale", 1.0))
         flow_shift = float(data.pop("flow_shift", 7.0))
         embedded_guidance_scale = float(data.pop("embedded_guidance_scale", 6.0))
+        enable_riflex = data.pop("enable_riflex", self.args.enable_riflex)
         logger.info(f"Processing with parameters: width={width}, height={height}, "
                    f"video_length={video_length}, seed={seed}, "
                    f"num_inference_steps={num_inference_steps}")
         try:
+            # Set up TeaCache for this generation if enabled
+            if hasattr(self.model.pipeline, 'transformer') and self.model.pipeline.transformer.enable_teacache:
+                transformer = self.model.pipeline.transformer
+                transformer.num_steps = num_inference_steps
+                transformer.cnt = 0
+                transformer.accumulated_rel_l1_distance = 0
+                transformer.previous_modulated_input = None
+                transformer.previous_residual = None
+            # Clean up memory before generation
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
             # Run inference
             outputs = self.model.predict(
                 prompt=prompt,
                 num_videos_per_prompt=1,
                 flow_shift=flow_shift,
                 batch_size=1,
+                embedded_guidance_scale=embedded_guidance_scale,
+                enable_riflex=enable_riflex
             )
             # Get the video tensor
             # Read video file and convert to base64
             with open(temp_path, "rb") as f:
                 video_bytes = f.read()
             video_base64 = base64.b64encode(video_bytes).decode()
             # Add MP4 data URI prefix
             # Cleanup
             os.remove(temp_path)
+            # Clean up memory after generation
+            if has_mmgp and hasattr(offload, 'last_offload_obj'):
+                offload.last_offload_obj.unload_all()
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
             logger.info("Successfully generated and encoded video")
             return video_data_uri
         except Exception as e:
             logger.error(f"Error during video generation: {str(e)}")
+            # Clean up memory after error
+            if has_mmgp and hasattr(offload, 'last_offload_obj'):
+                offload.last_offload_obj.unload_all()
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
             raise