import gradio as gr import torch import os import gc import numpy as np import tempfile from typing import Optional, Tuple import time # ZeroGPU support try: import spaces SPACES_AVAILABLE = True except ImportError: SPACES_AVAILABLE = False class spaces: @staticmethod def GPU(duration=300): def decorator(func): return func return decorator # Environment IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true" IS_SPACES = os.environ.get("SPACE_ID") is not None HAS_CUDA = torch.cuda.is_available() print(f"🚀 H200 Premium Setup: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}") # PREMIUM MODELS ONLY - No low quality fallbacks PREMIUM_MODELS = [ { "id": "THUDM/CogVideoX-5b", "name": "CogVideoX-5B", "pipeline_class": "CogVideoXPipeline", "resolution_options": [(720, 480), (480, 720)], "max_frames": 49, "dtype": torch.bfloat16, "fps": 8, "priority": 1, "description": "5B parameter video model - high quality" }, { "id": "THUDM/CogVideoX-2b", "name": "CogVideoX-2B", "pipeline_class": "CogVideoXPipeline", "resolution_options": [(720, 480), (480, 720)], "max_frames": 49, "dtype": torch.bfloat16, "fps": 8, "priority": 2, "description": "2B parameter model - faster generation" }, { "id": "Lightricks/LTX-Video", "name": "LTX-Video", "pipeline_class": "DiffusionPipeline", "resolution_options": [(512, 512), (768, 768)], "max_frames": 121, # LTX supports longer videos "dtype": torch.bfloat16, "fps": 24, # Higher FPS "priority": 3, "description": "Professional video generation model" } ] # Global variables MODEL = None MODEL_INFO = None LOADING_LOGS = [] def log_loading(message): """Enhanced logging with timestamps""" global LOADING_LOGS timestamp = time.strftime('%H:%M:%S') formatted_msg = f"[{timestamp}] {message}" print(formatted_msg) LOADING_LOGS.append(formatted_msg) def get_h200_memory(): """Get detailed H200 memory stats""" if HAS_CUDA: try: total = torch.cuda.get_device_properties(0).total_memory / (1024**3) allocated = torch.cuda.memory_allocated(0) / (1024**3) reserved = torch.cuda.memory_reserved(0) / (1024**3) return total, allocated, reserved except: return 0, 0, 0 return 0, 0, 0 def load_premium_model(): """Load premium models only - no fallbacks""" global MODEL, MODEL_INFO, LOADING_LOGS if MODEL is not None: return True LOADING_LOGS = [] log_loading("🎯 H200 Premium Model Loading - QUALITY PRIORITY") total_mem, allocated_mem, reserved_mem = get_h200_memory() log_loading(f"💾 H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved") # Sort by priority (premium first) sorted_models = sorted(PREMIUM_MODELS, key=lambda x: x["priority"]) for model_config in sorted_models: if try_load_premium_model(model_config): return True log_loading("❌ All premium models failed - check model availability") return False def try_load_premium_model(config): """Try loading premium model with optimized settings""" global MODEL, MODEL_INFO model_id = config["id"] model_name = config["name"] log_loading(f"🔄 Loading {model_name} (Premium)...") log_loading(f" 📋 Target: {config['pipeline_class']}, {config['max_frames']} frames, {config['fps']} fps") try: # Clear H200 memory if HAS_CUDA: torch.cuda.empty_cache() torch.cuda.synchronize() gc.collect() # Import specific pipeline if config["pipeline_class"] == "CogVideoXPipeline": from diffusers import CogVideoXPipeline PipelineClass = CogVideoXPipeline log_loading(f" 📥 Using CogVideoXPipeline...") else: from diffusers import DiffusionPipeline PipelineClass = DiffusionPipeline log_loading(f" 📥 Using DiffusionPipeline...") # Load with premium settings log_loading(f" 🔄 Downloading/Loading model...") pipe = PipelineClass.from_pretrained( model_id, torch_dtype=config["dtype"], trust_remote_code=True, # No variant, no use_safetensors restrictions ) # Move to H200 and optimize if HAS_CUDA: log_loading(f" 📱 Moving to H200 CUDA...") pipe = pipe.to("cuda") # Premium optimizations for H200's 69.5GB if hasattr(pipe, 'enable_vae_slicing'): pipe.enable_vae_slicing() log_loading(f" ⚡ VAE slicing enabled") if hasattr(pipe, 'enable_vae_tiling'): pipe.enable_vae_tiling() log_loading(f" ⚡ VAE tiling enabled") if hasattr(pipe, 'enable_memory_efficient_attention'): pipe.enable_memory_efficient_attention() log_loading(f" ⚡ Memory efficient attention enabled") # For H200's large memory, keep everything in GPU log_loading(f" 🚀 Keeping full model in H200 GPU memory") # Memory check after loading total_mem, allocated_mem, reserved_mem = get_h200_memory() log_loading(f" 💾 Post-load: {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved") # Validate model capabilities expected_frames = config["max_frames"] expected_fps = config["fps"] log_loading(f" ✅ {model_name} ready: {expected_frames} max frames @ {expected_fps} fps") MODEL = pipe MODEL_INFO = config log_loading(f"🎯 SUCCESS: {model_name} loaded for premium generation!") return True except Exception as e: log_loading(f"❌ {model_name} failed: {str(e)}") # Clear memory thoroughly if HAS_CUDA: torch.cuda.empty_cache() torch.cuda.synchronize() gc.collect() return False @spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x def generate_premium_video( prompt: str, negative_prompt: str = "", num_frames: int = 49, resolution: str = "720x480", num_inference_steps: int = 50, guidance_scale: float = 6.0, seed: int = -1 ) -> Tuple[Optional[str], str]: """Generate premium quality video with proper parameters""" global MODEL, MODEL_INFO # Load premium model if not load_premium_model(): logs = "\n".join(LOADING_LOGS[-5:]) return None, f"❌ No premium models available\n\nLogs:\n{logs}" # Input validation if not prompt.strip(): return None, "❌ Please enter a detailed prompt for premium generation." if len(prompt) < 10: return None, "❌ Please provide a more detailed prompt (minimum 10 characters)." # Parse resolution try: width, height = map(int, resolution.split('x')) except: width, height = MODEL_INFO["resolution_options"][0] # Validate resolution if (width, height) not in MODEL_INFO["resolution_options"]: width, height = MODEL_INFO["resolution_options"][0] log_loading(f"⚠️ Resolution adjusted to {width}x{height}") # Validate frames max_frames = MODEL_INFO["max_frames"] num_frames = min(max(num_frames, 16), max_frames) # Minimum 16 for quality # Model-specific parameter optimization if MODEL_INFO["name"].startswith("CogVideoX"): # CogVideoX optimal parameters guidance_scale = max(6.0, min(guidance_scale, 7.0)) # CogVideoX sweet spot num_inference_steps = max(50, num_inference_steps) # Higher steps for quality elif MODEL_INFO["name"] == "LTX-Video": # LTX-Video optimal parameters guidance_scale = max(7.0, min(guidance_scale, 8.5)) # LTX sweet spot num_inference_steps = max(30, num_inference_steps) try: # H200 memory preparation start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0 # Enhanced seed handling if seed == -1: seed = np.random.randint(0, 2**32 - 1) device = "cuda" if HAS_CUDA else "cpu" generator = torch.Generator(device=device).manual_seed(seed) log_loading(f"🎬 PREMIUM GENERATION START") log_loading(f"📋 Model: {MODEL_INFO['name']}") log_loading(f"📐 Resolution: {width}x{height}") log_loading(f"🎞️ Frames: {num_frames} @ {MODEL_INFO['fps']} fps = {num_frames/MODEL_INFO['fps']:.1f}s video") log_loading(f"⚙️ Steps: {num_inference_steps}, Guidance: {guidance_scale}") log_loading(f"📝 Prompt: {prompt[:100]}...") start_time = time.time() # Premium generation with optimal autocast with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA): # Prepare generation parameters gen_kwargs = { "prompt": prompt, "height": height, "width": width, "num_frames": num_frames, "num_inference_steps": num_inference_steps, "guidance_scale": guidance_scale, "generator": generator, } # Add negative prompt for quality if negative_prompt.strip(): gen_kwargs["negative_prompt"] = negative_prompt else: # Default negative prompt for premium quality default_negative = "blurry, low quality, distorted, pixelated, compression artifacts, watermark, text, signature, amateur, static, boring" gen_kwargs["negative_prompt"] = default_negative log_loading(f"🚫 Using default negative prompt for quality") # Model-specific parameters if MODEL_INFO["name"].startswith("CogVideoX"): gen_kwargs["num_videos_per_prompt"] = 1 log_loading(f"🎥 CogVideoX generation starting...") # Generate with progress log_loading(f"🚀 H200 generation in progress...") result = MODEL(**gen_kwargs) end_time = time.time() generation_time = end_time - start_time # Extract video frames if hasattr(result, 'frames'): video_frames = result.frames[0] log_loading(f"📹 Extracted {len(video_frames)} frames") elif hasattr(result, 'videos'): video_frames = result.videos[0] log_loading(f"📹 Extracted video tensor: {video_frames.shape}") else: log_loading(f"❌ Unknown result format: {type(result)}") return None, "❌ Could not extract video frames from result" # Export with proper FPS target_fps = MODEL_INFO["fps"] actual_duration = num_frames / target_fps with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file: from diffusers.utils import export_to_video export_to_video(video_frames, tmp_file.name, fps=target_fps) video_path = tmp_file.name log_loading(f"🎬 Exported to {tmp_file.name} @ {target_fps} fps") # Memory stats end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0 memory_used = end_memory - start_memory # Success report success_msg = f"""🎯 **PREMIUM H200 VIDEO GENERATED** 🤖 **Model:** {MODEL_INFO['name']} 📝 **Prompt:** {prompt} 🎬 **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds** 📐 **Resolution:** {width}x{height} ⚙️ **Quality:** {num_inference_steps} inference steps 🎯 **Guidance:** {guidance_scale} 🎲 **Seed:** {seed} ⏱️ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f} minutes) 🖥️ **Device:** H200 MIG (69.5GB) 💾 **Memory Used:** {memory_used:.1f}GB 📋 **Model Notes:** {MODEL_INFO['description']} **🎥 Video Quality:** Premium quality with {num_frames} frames over {actual_duration:.1f} seconds""" log_loading(f"✅ PREMIUM generation completed: {actual_duration:.1f}s video in {generation_time:.1f}s") return video_path, success_msg except torch.cuda.OutOfMemoryError: if HAS_CUDA: torch.cuda.empty_cache() gc.collect() return None, "❌ H200 memory exceeded. Try reducing frames or resolution." except Exception as e: if HAS_CUDA: torch.cuda.empty_cache() gc.collect() error_msg = str(e) log_loading(f"❌ Generation error: {error_msg}") return None, f"❌ Premium generation failed: {error_msg}" def get_model_status(): """Get current premium model status""" if MODEL is None: return "⏳ **No premium model loaded** - will auto-load on generation" fps = MODEL_INFO["fps"] max_frames = MODEL_INFO["max_frames"] max_duration = max_frames / fps resolutions = ", ".join([f"{w}x{h}" for w, h in MODEL_INFO["resolution_options"]]) return f"""🎯 **{MODEL_INFO['name']} Ready** **📋 Premium Capabilities:** - **Max Duration:** {max_duration:.1f} seconds ({max_frames} frames @ {fps} fps) - **Resolutions:** {resolutions} - **Quality:** {MODEL_INFO['description']} **⚡ H200 Optimizations:** - Full model in GPU memory - Memory efficient attention - VAE optimizations enabled **💡 This model produces {max_duration:.1f} second videos with {max_frames} frames!**""" def get_loading_logs(): """Get formatted loading logs""" global LOADING_LOGS if not LOADING_LOGS: return "No loading attempts yet." return "\n".join(LOADING_LOGS) def suggest_premium_settings(): """Suggest optimal settings for current model""" if MODEL is None: return "Load a premium model first." model_name = MODEL_INFO['name'] max_frames = MODEL_INFO['max_frames'] fps = MODEL_INFO['fps'] max_duration = max_frames / fps return f"""## 🎯 Optimal Settings for {model_name} **🚀 Maximum Quality:** - Frames: {max_frames} (full {max_duration:.1f} second video) - Inference Steps: 50+ - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5} - Resolution: {MODEL_INFO['resolution_options'][-1]} **⚖️ Balanced (Recommended):** - Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video) - Inference Steps: 35-50 - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5} **⚡ Fast Test:** - Frames: 25 ({25/fps:.1f} second video) - Inference Steps: 30 - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5} **📝 Premium Prompting Tips:** - Be very specific and detailed - Include camera movements: "slow zoom", "tracking shot" - Describe lighting: "golden hour", "cinematic lighting" - Add style: "professional cinematography", "8K quality" - Mention motion: "smooth movement", "graceful motion" **Example Premium Prompt:** "A majestic golden eagle soaring gracefully through misty mountain peaks during golden hour, cinematic tracking shot with shallow depth of field, professional wildlife cinematography, smooth gliding motion, warm sunset lighting, 8K quality" Remember: Longer videos need more detailed prompts to maintain coherence!""" # Create premium interface with gr.Blocks(title="H200 Premium Video Generator", theme=gr.themes.Glass()) as demo: gr.Markdown(""" # 🎯 H200 Premium Video Generator **Premium Models Only** • **Long-Form Videos** • **Professional Quality** *CogVideoX-5B • LTX-Video • No Low-Quality Fallbacks* """) # Premium status with gr.Row(): gr.Markdown("""