ozilion commited on
Commit
c40d82c
Β·
verified Β·
1 Parent(s): 05424ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +342 -253
app.py CHANGED
@@ -7,7 +7,7 @@ import tempfile
7
  from typing import Optional, Tuple
8
  import time
9
 
10
- # ZeroGPU support (even without detection)
11
  try:
12
  import spaces
13
  SPACES_AVAILABLE = True
@@ -15,7 +15,7 @@ except ImportError:
15
  SPACES_AVAILABLE = False
16
  class spaces:
17
  @staticmethod
18
- def GPU(duration=240):
19
  def decorator(func): return func
20
  return decorator
21
 
@@ -24,69 +24,42 @@ IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
24
  IS_SPACES = os.environ.get("SPACE_ID") is not None
25
  HAS_CUDA = torch.cuda.is_available()
26
 
27
- print(f"πŸš€ H200 MIG Environment: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
28
 
29
- # Working models based on your diagnostic
30
- WORKING_MODELS = [
31
  {
32
- "id": "Lightricks/LTX-Video",
33
- "name": "LTX-Video",
34
- "pipeline_class": "DiffusionPipeline",
35
- "variant": None, # No fp16 variant available
36
- "use_safetensors": False, # Use .bin files
37
- "resolution": (512, 512),
38
- "max_frames": 50,
39
  "dtype": torch.bfloat16,
 
40
  "priority": 1,
41
- "description": "LTX-Video via DiffusionPipeline (no variant)"
42
  },
43
  {
44
- "id": "THUDM/CogVideoX-5b",
45
- "name": "CogVideoX-5B",
46
- "pipeline_class": "CogVideoXPipeline",
47
- "variant": None,
48
- "use_safetensors": True,
49
- "resolution": (720, 480),
50
  "max_frames": 49,
51
  "dtype": torch.bfloat16,
 
52
  "priority": 2,
53
- "description": "CogVideo 5B model - proven to work"
54
  },
55
  {
56
- "id": "cerspense/zeroscope_v2_576w",
57
- "name": "Zeroscope V2",
58
  "pipeline_class": "DiffusionPipeline",
59
- "variant": None, # No fp16 variant
60
- "use_safetensors": False, # Use .bin files
61
- "resolution": (576, 320),
62
- "max_frames": 24,
63
- "dtype": torch.float16,
64
  "priority": 3,
65
- "description": "Zeroscope without safetensors"
66
- },
67
- {
68
- "id": "damo-vilab/text-to-video-ms-1.7b",
69
- "name": "ModelScope T2V",
70
- "pipeline_class": "DiffusionPipeline",
71
- "variant": None,
72
- "use_safetensors": False,
73
- "resolution": (256, 256),
74
- "max_frames": 16,
75
- "dtype": torch.float16,
76
- "priority": 4,
77
- "description": "ModelScope reliable fallback"
78
- },
79
- {
80
- "id": "ali-vilab/text-to-video-ms-1.7b",
81
- "name": "AliVilab T2V",
82
- "pipeline_class": "DiffusionPipeline",
83
- "variant": None,
84
- "use_safetensors": False,
85
- "resolution": (256, 256),
86
- "max_frames": 16,
87
- "dtype": torch.float16,
88
- "priority": 5,
89
- "description": "AliVilab alternative"
90
  }
91
  ]
92
 
@@ -96,194 +69,230 @@ MODEL_INFO = None
96
  LOADING_LOGS = []
97
 
98
  def log_loading(message):
99
- """Enhanced logging"""
100
  global LOADING_LOGS
101
- print(message)
102
- LOADING_LOGS.append(f"{time.strftime('%H:%M:%S')} - {message}")
 
 
103
 
104
  def get_h200_memory():
105
- """Get H200 MIG memory stats"""
106
  if HAS_CUDA:
107
  try:
108
  total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
109
  allocated = torch.cuda.memory_allocated(0) / (1024**3)
110
- return total, allocated
 
111
  except:
112
- return 0, 0
113
- return 0, 0
114
 
115
- def load_working_model():
116
- """Load first working model with H200 MIG optimizations"""
117
  global MODEL, MODEL_INFO, LOADING_LOGS
118
 
119
  if MODEL is not None:
120
  return True
121
 
122
  LOADING_LOGS = []
123
- log_loading("πŸš€ H200 MIG (69.5GB) model loading started...")
124
 
125
- total_mem, allocated_mem = get_h200_memory()
126
- log_loading(f"πŸ’Ύ Initial H200 memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB used")
127
 
128
- # Sort by priority
129
- sorted_models = sorted(WORKING_MODELS, key=lambda x: x["priority"])
130
 
131
  for model_config in sorted_models:
132
- if try_load_specific_model(model_config):
133
  return True
134
 
135
- log_loading("❌ All models failed on H200 MIG")
136
  return False
137
 
138
- def try_load_specific_model(config):
139
- """Try loading a specific model with exact configuration"""
140
  global MODEL, MODEL_INFO
141
 
142
  model_id = config["id"]
143
  model_name = config["name"]
144
 
145
- log_loading(f"πŸ”„ Attempting {model_name}...")
146
- log_loading(f" πŸ“‹ Config: {config['pipeline_class']}, variant={config['variant']}, safetensors={config['use_safetensors']}")
147
 
148
  try:
149
- # Clear memory first
150
  if HAS_CUDA:
151
  torch.cuda.empty_cache()
 
152
  gc.collect()
153
 
154
- # Import appropriate pipeline
155
  if config["pipeline_class"] == "CogVideoXPipeline":
156
  from diffusers import CogVideoXPipeline
157
  PipelineClass = CogVideoXPipeline
 
158
  else:
159
- from diffusers import DiffusionPipeline
160
  PipelineClass = DiffusionPipeline
 
 
 
 
 
 
 
 
 
 
161
 
162
- # Prepare loading parameters
163
- load_params = {
164
- "torch_dtype": config["dtype"],
165
- "trust_remote_code": True
166
- }
167
-
168
- # Add variant only if specified
169
- if config["variant"]:
170
- load_params["variant"] = config["variant"]
171
-
172
- # Add safetensors setting
173
- if config["use_safetensors"]:
174
- load_params["use_safetensors"] = True
175
-
176
- log_loading(f" πŸ“₯ Loading with params: {load_params}")
177
-
178
- # Load model
179
- pipe = PipelineClass.from_pretrained(model_id, **load_params)
180
-
181
- # Move to H200 MIG GPU
182
  if HAS_CUDA:
 
183
  pipe = pipe.to("cuda")
184
- log_loading(f" πŸ“± Moved to H200 MIG CUDA")
185
-
186
- # H200 MIG optimizations (69.5GB is plenty!)
187
- if hasattr(pipe, 'enable_vae_slicing'):
188
- pipe.enable_vae_slicing()
189
- log_loading(f" ⚑ VAE slicing enabled")
190
-
191
- if hasattr(pipe, 'enable_vae_tiling'):
192
- pipe.enable_vae_tiling()
193
- log_loading(f" ⚑ VAE tiling enabled")
194
-
195
- if hasattr(pipe, 'enable_memory_efficient_attention'):
196
- pipe.enable_memory_efficient_attention()
197
- log_loading(f" ⚑ Memory efficient attention enabled")
198
-
199
- # Don't use CPU offload on H200 - keep everything in GPU
200
- log_loading(f" πŸš€ Keeping model fully in H200 GPU memory")
201
 
202
  # Memory check after loading
203
- total_mem, allocated_mem = get_h200_memory()
204
- log_loading(f" πŸ’Ύ Post-load memory: {allocated_mem:.1f}GB used / {total_mem:.1f}GB total")
 
 
 
 
 
205
 
206
  MODEL = pipe
207
  MODEL_INFO = config
208
 
209
- log_loading(f"βœ… {model_name} loaded successfully on H200 MIG!")
210
  return True
211
 
212
  except Exception as e:
213
  log_loading(f"❌ {model_name} failed: {str(e)}")
214
- # Clear memory before next attempt
215
  if HAS_CUDA:
216
  torch.cuda.empty_cache()
 
217
  gc.collect()
218
  return False
219
 
220
- @spaces.GPU(duration=240) if SPACES_AVAILABLE else lambda x: x
221
- def generate_video(
222
  prompt: str,
223
  negative_prompt: str = "",
224
- num_frames: int = 25,
225
- num_inference_steps: int = 25,
226
- guidance_scale: float = 7.5,
 
227
  seed: int = -1
228
  ) -> Tuple[Optional[str], str]:
229
- """Generate video with H200 MIG power"""
230
 
231
  global MODEL, MODEL_INFO
232
 
233
- # Load model if needed
234
- if not load_working_model():
235
- logs = "\n".join(LOADING_LOGS[-10:]) # Last 10 log entries
236
- return None, f"❌ Model loading failed on H200 MIG\n\nRecent logs:\n{logs}"
237
 
238
  # Input validation
239
  if not prompt.strip():
240
- return None, "❌ Please enter a valid prompt."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- # Get model constraints
243
  max_frames = MODEL_INFO["max_frames"]
244
- width, height = MODEL_INFO["resolution"]
245
 
246
- # Adjust parameters for model
247
- num_frames = min(max(num_frames, 8), max_frames)
 
 
 
 
 
 
 
248
 
249
  try:
250
- # H200 MIG memory management
251
  start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
252
 
253
- # Set seed
254
  if seed == -1:
255
  seed = np.random.randint(0, 2**32 - 1)
256
 
257
  device = "cuda" if HAS_CUDA else "cpu"
258
  generator = torch.Generator(device=device).manual_seed(seed)
259
 
260
- log_loading(f"🎬 H200 MIG Generation: {MODEL_INFO['name']}")
261
- log_loading(f"πŸ“ {width}x{height}, {num_frames} frames, {num_inference_steps} steps")
 
 
 
 
262
 
263
  start_time = time.time()
264
 
265
- # Generate with H200 MIG autocast
266
- with torch.autocast(device, dtype=MODEL_INFO["dtype"]):
267
- # Prepare generation arguments
 
268
  gen_kwargs = {
269
  "prompt": prompt,
270
- "num_frames": num_frames,
271
  "height": height,
272
  "width": width,
 
273
  "num_inference_steps": num_inference_steps,
274
  "guidance_scale": guidance_scale,
275
- "generator": generator
276
  }
277
 
278
- # Add negative prompt if provided
279
  if negative_prompt.strip():
280
  gen_kwargs["negative_prompt"] = negative_prompt
 
 
 
 
 
281
 
282
- # Model-specific adjustments
283
- if MODEL_INFO["name"] == "CogVideoX-5B":
284
  gen_kwargs["num_videos_per_prompt"] = 1
 
285
 
286
- log_loading(f"πŸš€ Starting H200 MIG generation...")
 
287
  result = MODEL(**gen_kwargs)
288
 
289
  end_time = time.time()
@@ -292,213 +301,293 @@ def generate_video(
292
  # Extract video frames
293
  if hasattr(result, 'frames'):
294
  video_frames = result.frames[0]
 
295
  elif hasattr(result, 'videos'):
296
  video_frames = result.videos[0]
 
297
  else:
298
- return None, "❌ Could not extract video frames"
 
 
 
 
 
299
 
300
- # Export video
301
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
302
  from diffusers.utils import export_to_video
303
- fps = 8
304
- export_to_video(video_frames, tmp_file.name, fps=fps)
305
  video_path = tmp_file.name
 
306
 
307
  # Memory stats
308
  end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
309
  memory_used = end_memory - start_memory
310
 
311
- success_msg = f"""βœ… **H200 MIG Video Generated!**
 
312
 
313
- πŸ€– **Model:** {MODEL_INFO['name']}
314
  πŸ“ **Prompt:** {prompt}
315
- 🎬 **Frames:** {num_frames} @ {fps} FPS
316
  πŸ“ **Resolution:** {width}x{height}
317
- βš™οΈ **Inference Steps:** {num_inference_steps}
318
- 🎯 **Guidance Scale:** {guidance_scale}
319
  🎲 **Seed:** {seed}
320
- ⏱️ **Generation Time:** {generation_time:.1f}s
321
  πŸ–₯️ **Device:** H200 MIG (69.5GB)
322
  πŸ’Ύ **Memory Used:** {memory_used:.1f}GB
323
- πŸŽ₯ **Video Length:** {num_frames/fps:.1f}s
324
- πŸ“‹ **Notes:** {MODEL_INFO['description']}"""
 
325
 
326
- log_loading(f"βœ… Generation completed in {generation_time:.1f}s")
327
 
328
  return video_path, success_msg
329
 
330
  except torch.cuda.OutOfMemoryError:
331
- torch.cuda.empty_cache()
 
332
  gc.collect()
333
- return None, "❌ H200 MIG memory exceeded (rare!). Try reducing parameters."
334
 
335
  except Exception as e:
336
  if HAS_CUDA:
337
  torch.cuda.empty_cache()
338
  gc.collect()
339
- return None, f"❌ H200 MIG generation failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
  def get_loading_logs():
342
- """Return formatted loading logs"""
343
  global LOADING_LOGS
344
  if not LOADING_LOGS:
345
  return "No loading attempts yet."
346
  return "\n".join(LOADING_LOGS)
347
 
348
- def get_h200_status():
349
- """Get H200 MIG specific status"""
350
- total_mem, allocated_mem = get_h200_memory()
 
351
 
352
- status = f"""## πŸš€ H200 MIG Status
 
 
 
 
 
353
 
354
- **πŸ–₯️ Hardware:**
355
- - GPU: NVIDIA H200 MIG 3g.71gb
356
- - Total Memory: {total_mem:.1f} GB
357
- - Allocated: {allocated_mem:.1f} GB
358
- - Free: {total_mem - allocated_mem:.1f} GB
359
 
360
- **πŸ€– Current Model:**"""
361
-
362
- if MODEL is not None:
363
- status += f"""
364
- - βœ… **{MODEL_INFO['name']}** loaded and ready
365
- - πŸ“ Resolution: {MODEL_INFO['resolution']}
366
- - 🎬 Max Frames: {MODEL_INFO['max_frames']}
367
- - πŸ’Ύ Memory Usage: {allocated_mem:.1f}GB
368
- - πŸ“‹ Details: {MODEL_INFO['description']}"""
369
- else:
370
- status += f"""
371
- - ⏳ No model loaded yet
372
- - πŸ”„ Will auto-load on first generation"""
373
-
374
- status += f"""
375
 
376
- **πŸ’‘ H200 MIG Advantages:**
377
- - 69.5GB dedicated memory
378
- - Isolated GPU partition
379
- - Consistent performance
380
- - No interference from other workloads"""
381
-
382
- return status
383
 
384
- def force_reload():
385
- """Force model reload"""
386
- global MODEL, MODEL_INFO
387
- MODEL = None
388
- MODEL_INFO = None
389
- torch.cuda.empty_cache()
390
- gc.collect()
391
-
392
- success = load_working_model()
393
- logs = "\n".join(LOADING_LOGS[-5:]) # Last 5 entries
394
-
395
- return f"πŸ”„ **Force Reload Result:** {'βœ… Success' if success else '❌ Failed'}\n\nRecent logs:\n{logs}"
396
 
397
- # Create H200 MIG optimized interface
398
- with gr.Blocks(title="H200 MIG Video Generator", theme=gr.themes.Glass()) as demo:
399
 
400
  gr.Markdown("""
401
- # πŸš€ H200 MIG Video Generator
402
 
403
- **NVIDIA H200 MIG 3g.71gb** β€’ **69.5GB Memory** β€’ **Working Models**
 
 
404
  """)
405
 
406
- # Status bar
407
  with gr.Row():
408
  gr.Markdown("""
409
- <div style="background: linear-gradient(45deg, #FF6B6B, #4ECDC4); padding: 10px; border-radius: 10px; text-align: center; color: white; font-weight: bold;">
410
- πŸ”₯ H200 MIG ACTIVE - OPTIMIZED FOR YOUR SETUP πŸ”₯
411
  </div>
412
  """)
413
 
414
- with gr.Tab("πŸŽ₯ Generate Video"):
415
  with gr.Row():
416
  with gr.Column(scale=1):
417
  prompt_input = gr.Textbox(
418
- label="πŸ“ Video Prompt",
419
- placeholder="A majestic eagle soaring through mountain peaks at golden hour, cinematic shot with dramatic lighting...",
420
- lines=4
 
421
  )
422
 
423
  negative_prompt_input = gr.Textbox(
424
- label="🚫 Negative Prompt",
425
- placeholder="blurry, low quality, distorted, pixelated, static...",
426
  lines=2
427
  )
428
 
429
- with gr.Accordion("βš™οΈ H200 MIG Settings", open=True):
430
  with gr.Row():
431
- num_frames = gr.Slider(8, 50, value=25, step=1, label="🎬 Frames")
432
- num_steps = gr.Slider(15, 50, value=25, step=1, label="βš™οΈ Steps")
 
 
 
 
 
 
 
 
 
 
 
433
 
434
  with gr.Row():
435
- guidance_scale = gr.Slider(1.0, 15.0, value=7.5, step=0.5, label="🎯 Guidance")
436
- seed = gr.Number(value=-1, precision=0, label="🎲 Seed")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
- generate_btn = gr.Button("πŸš€ Generate on H200 MIG", variant="primary", size="lg")
 
 
 
 
439
 
440
  gr.Markdown("""
441
- **⏱️ Generation:** 1-3 minutes on H200 MIG
 
 
442
 
443
- **πŸ’‘ Auto-detects:** Best working model for your setup
 
 
 
444
  """)
445
 
446
  with gr.Column(scale=1):
447
- video_output = gr.Video(label="πŸŽ₯ H200 MIG Generated Video", height=400)
448
- result_text = gr.Textbox(label="πŸ“‹ Generation Report", lines=10, show_copy_button=True)
 
 
 
 
 
 
 
 
449
 
 
450
  generate_btn.click(
451
- fn=generate_video,
452
- inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed],
 
 
 
453
  outputs=[video_output, result_text]
454
  )
455
 
456
- # H200 MIG optimized examples
457
  gr.Examples(
458
  examples=[
459
  [
460
- "A majestic golden eagle soaring through misty mountain peaks at sunrise",
461
- "blurry, low quality, static",
462
- 25, 25, 7.5, 42
463
  ],
464
  [
465
- "Ocean waves crashing against rocks during sunset, cinematic view",
466
- "pixelated, distorted, watermark",
467
- 30, 30, 8.0, 123
468
  ],
469
  [
470
- "A peaceful cat sleeping in a sunny garden with flowers",
471
- "dark, gloomy, low quality",
472
- 20, 20, 7.0, 456
473
  ],
474
  [
475
- "Time-lapse of clouds moving over a mountain landscape",
476
- "static, boring, blurry",
477
- 35, 35, 7.5, 789
478
  ]
479
  ],
480
- inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
481
  )
482
 
483
- with gr.Tab("πŸ”§ H200 MIG Status"):
484
  with gr.Row():
485
- status_btn = gr.Button("πŸ” Check H200 Status", variant="secondary")
486
- logs_btn = gr.Button("πŸ“‹ View Loading Logs", variant="secondary")
487
- reload_btn = gr.Button("πŸ”„ Force Reload", variant="secondary")
488
 
489
  status_output = gr.Markdown()
490
- logs_output = gr.Textbox(label="Detailed Loading Logs", lines=15, show_copy_button=True)
491
- reload_output = gr.Markdown()
492
 
493
- status_btn.click(fn=get_h200_status, outputs=status_output)
494
  logs_btn.click(fn=get_loading_logs, outputs=logs_output)
495
- reload_btn.click(fn=force_reload, outputs=reload_output)
496
 
497
  # Auto-load status
498
- demo.load(fn=get_h200_status, outputs=status_output)
499
 
500
  if __name__ == "__main__":
501
- demo.queue(max_size=3)
502
  demo.launch(
503
  share=False,
504
  server_name="0.0.0.0",
 
7
  from typing import Optional, Tuple
8
  import time
9
 
10
+ # ZeroGPU support
11
  try:
12
  import spaces
13
  SPACES_AVAILABLE = True
 
15
  SPACES_AVAILABLE = False
16
  class spaces:
17
  @staticmethod
18
+ def GPU(duration=300):
19
  def decorator(func): return func
20
  return decorator
21
 
 
24
  IS_SPACES = os.environ.get("SPACE_ID") is not None
25
  HAS_CUDA = torch.cuda.is_available()
26
 
27
+ print(f"πŸš€ H200 Premium Setup: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
28
 
29
+ # PREMIUM MODELS ONLY - No low quality fallbacks
30
+ PREMIUM_MODELS = [
31
  {
32
+ "id": "THUDM/CogVideoX-5b",
33
+ "name": "CogVideoX-5B",
34
+ "pipeline_class": "CogVideoXPipeline",
35
+ "resolution_options": [(720, 480), (480, 720)],
36
+ "max_frames": 49,
 
 
37
  "dtype": torch.bfloat16,
38
+ "fps": 8,
39
  "priority": 1,
40
+ "description": "5B parameter video model - high quality"
41
  },
42
  {
43
+ "id": "THUDM/CogVideoX-2b",
44
+ "name": "CogVideoX-2B",
45
+ "pipeline_class": "CogVideoXPipeline",
46
+ "resolution_options": [(720, 480), (480, 720)],
 
 
47
  "max_frames": 49,
48
  "dtype": torch.bfloat16,
49
+ "fps": 8,
50
  "priority": 2,
51
+ "description": "2B parameter model - faster generation"
52
  },
53
  {
54
+ "id": "Lightricks/LTX-Video",
55
+ "name": "LTX-Video",
56
  "pipeline_class": "DiffusionPipeline",
57
+ "resolution_options": [(512, 512), (768, 768)],
58
+ "max_frames": 121, # LTX supports longer videos
59
+ "dtype": torch.bfloat16,
60
+ "fps": 24, # Higher FPS
 
61
  "priority": 3,
62
+ "description": "Professional video generation model"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
64
  ]
65
 
 
69
  LOADING_LOGS = []
70
 
71
  def log_loading(message):
72
+ """Enhanced logging with timestamps"""
73
  global LOADING_LOGS
74
+ timestamp = time.strftime('%H:%M:%S')
75
+ formatted_msg = f"[{timestamp}] {message}"
76
+ print(formatted_msg)
77
+ LOADING_LOGS.append(formatted_msg)
78
 
79
  def get_h200_memory():
80
+ """Get detailed H200 memory stats"""
81
  if HAS_CUDA:
82
  try:
83
  total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
84
  allocated = torch.cuda.memory_allocated(0) / (1024**3)
85
+ reserved = torch.cuda.memory_reserved(0) / (1024**3)
86
+ return total, allocated, reserved
87
  except:
88
+ return 0, 0, 0
89
+ return 0, 0, 0
90
 
91
+ def load_premium_model():
92
+ """Load premium models only - no fallbacks"""
93
  global MODEL, MODEL_INFO, LOADING_LOGS
94
 
95
  if MODEL is not None:
96
  return True
97
 
98
  LOADING_LOGS = []
99
+ log_loading("🎯 H200 Premium Model Loading - QUALITY PRIORITY")
100
 
101
+ total_mem, allocated_mem, reserved_mem = get_h200_memory()
102
+ log_loading(f"πŸ’Ύ H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved")
103
 
104
+ # Sort by priority (premium first)
105
+ sorted_models = sorted(PREMIUM_MODELS, key=lambda x: x["priority"])
106
 
107
  for model_config in sorted_models:
108
+ if try_load_premium_model(model_config):
109
  return True
110
 
111
+ log_loading("❌ All premium models failed - check model availability")
112
  return False
113
 
114
+ def try_load_premium_model(config):
115
+ """Try loading premium model with optimized settings"""
116
  global MODEL, MODEL_INFO
117
 
118
  model_id = config["id"]
119
  model_name = config["name"]
120
 
121
+ log_loading(f"πŸ”„ Loading {model_name} (Premium)...")
122
+ log_loading(f" πŸ“‹ Target: {config['pipeline_class']}, {config['max_frames']} frames, {config['fps']} fps")
123
 
124
  try:
125
+ # Clear H200 memory
126
  if HAS_CUDA:
127
  torch.cuda.empty_cache()
128
+ torch.cuda.synchronize()
129
  gc.collect()
130
 
131
+ # Import specific pipeline
132
  if config["pipeline_class"] == "CogVideoXPipeline":
133
  from diffusers import CogVideoXPipeline
134
  PipelineClass = CogVideoXPipeline
135
+ log_loading(f" πŸ“₯ Using CogVideoXPipeline...")
136
  else:
137
+ from diffusers import DiffusionPipeline
138
  PipelineClass = DiffusionPipeline
139
+ log_loading(f" πŸ“₯ Using DiffusionPipeline...")
140
+
141
+ # Load with premium settings
142
+ log_loading(f" πŸ”„ Downloading/Loading model...")
143
+ pipe = PipelineClass.from_pretrained(
144
+ model_id,
145
+ torch_dtype=config["dtype"],
146
+ trust_remote_code=True,
147
+ # No variant, no use_safetensors restrictions
148
+ )
149
 
150
+ # Move to H200 and optimize
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  if HAS_CUDA:
152
+ log_loading(f" πŸ“± Moving to H200 CUDA...")
153
  pipe = pipe.to("cuda")
154
+
155
+ # Premium optimizations for H200's 69.5GB
156
+ if hasattr(pipe, 'enable_vae_slicing'):
157
+ pipe.enable_vae_slicing()
158
+ log_loading(f" ⚑ VAE slicing enabled")
159
+
160
+ if hasattr(pipe, 'enable_vae_tiling'):
161
+ pipe.enable_vae_tiling()
162
+ log_loading(f" ⚑ VAE tiling enabled")
163
+
164
+ if hasattr(pipe, 'enable_memory_efficient_attention'):
165
+ pipe.enable_memory_efficient_attention()
166
+ log_loading(f" ⚑ Memory efficient attention enabled")
167
+
168
+ # For H200's large memory, keep everything in GPU
169
+ log_loading(f" πŸš€ Keeping full model in H200 GPU memory")
 
170
 
171
  # Memory check after loading
172
+ total_mem, allocated_mem, reserved_mem = get_h200_memory()
173
+ log_loading(f" πŸ’Ύ Post-load: {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved")
174
+
175
+ # Validate model capabilities
176
+ expected_frames = config["max_frames"]
177
+ expected_fps = config["fps"]
178
+ log_loading(f" βœ… {model_name} ready: {expected_frames} max frames @ {expected_fps} fps")
179
 
180
  MODEL = pipe
181
  MODEL_INFO = config
182
 
183
+ log_loading(f"🎯 SUCCESS: {model_name} loaded for premium generation!")
184
  return True
185
 
186
  except Exception as e:
187
  log_loading(f"❌ {model_name} failed: {str(e)}")
188
+ # Clear memory thoroughly
189
  if HAS_CUDA:
190
  torch.cuda.empty_cache()
191
+ torch.cuda.synchronize()
192
  gc.collect()
193
  return False
194
 
195
+ @spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x
196
+ def generate_premium_video(
197
  prompt: str,
198
  negative_prompt: str = "",
199
+ num_frames: int = 49,
200
+ resolution: str = "720x480",
201
+ num_inference_steps: int = 50,
202
+ guidance_scale: float = 6.0,
203
  seed: int = -1
204
  ) -> Tuple[Optional[str], str]:
205
+ """Generate premium quality video with proper parameters"""
206
 
207
  global MODEL, MODEL_INFO
208
 
209
+ # Load premium model
210
+ if not load_premium_model():
211
+ logs = "\n".join(LOADING_LOGS[-5:])
212
+ return None, f"❌ No premium models available\n\nLogs:\n{logs}"
213
 
214
  # Input validation
215
  if not prompt.strip():
216
+ return None, "❌ Please enter a detailed prompt for premium generation."
217
+
218
+ if len(prompt) < 10:
219
+ return None, "❌ Please provide a more detailed prompt (minimum 10 characters)."
220
+
221
+ # Parse resolution
222
+ try:
223
+ width, height = map(int, resolution.split('x'))
224
+ except:
225
+ width, height = MODEL_INFO["resolution_options"][0]
226
+
227
+ # Validate resolution
228
+ if (width, height) not in MODEL_INFO["resolution_options"]:
229
+ width, height = MODEL_INFO["resolution_options"][0]
230
+ log_loading(f"⚠️ Resolution adjusted to {width}x{height}")
231
 
232
+ # Validate frames
233
  max_frames = MODEL_INFO["max_frames"]
234
+ num_frames = min(max(num_frames, 16), max_frames) # Minimum 16 for quality
235
 
236
+ # Model-specific parameter optimization
237
+ if MODEL_INFO["name"].startswith("CogVideoX"):
238
+ # CogVideoX optimal parameters
239
+ guidance_scale = max(6.0, min(guidance_scale, 7.0)) # CogVideoX sweet spot
240
+ num_inference_steps = max(50, num_inference_steps) # Higher steps for quality
241
+ elif MODEL_INFO["name"] == "LTX-Video":
242
+ # LTX-Video optimal parameters
243
+ guidance_scale = max(7.0, min(guidance_scale, 8.5)) # LTX sweet spot
244
+ num_inference_steps = max(30, num_inference_steps)
245
 
246
  try:
247
+ # H200 memory preparation
248
  start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
249
 
250
+ # Enhanced seed handling
251
  if seed == -1:
252
  seed = np.random.randint(0, 2**32 - 1)
253
 
254
  device = "cuda" if HAS_CUDA else "cpu"
255
  generator = torch.Generator(device=device).manual_seed(seed)
256
 
257
+ log_loading(f"🎬 PREMIUM GENERATION START")
258
+ log_loading(f"πŸ“‹ Model: {MODEL_INFO['name']}")
259
+ log_loading(f"πŸ“ Resolution: {width}x{height}")
260
+ log_loading(f"🎞️ Frames: {num_frames} @ {MODEL_INFO['fps']} fps = {num_frames/MODEL_INFO['fps']:.1f}s video")
261
+ log_loading(f"βš™οΈ Steps: {num_inference_steps}, Guidance: {guidance_scale}")
262
+ log_loading(f"πŸ“ Prompt: {prompt[:100]}...")
263
 
264
  start_time = time.time()
265
 
266
+ # Premium generation with optimal autocast
267
+ with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA):
268
+
269
+ # Prepare generation parameters
270
  gen_kwargs = {
271
  "prompt": prompt,
 
272
  "height": height,
273
  "width": width,
274
+ "num_frames": num_frames,
275
  "num_inference_steps": num_inference_steps,
276
  "guidance_scale": guidance_scale,
277
+ "generator": generator,
278
  }
279
 
280
+ # Add negative prompt for quality
281
  if negative_prompt.strip():
282
  gen_kwargs["negative_prompt"] = negative_prompt
283
+ else:
284
+ # Default negative prompt for premium quality
285
+ default_negative = "blurry, low quality, distorted, pixelated, compression artifacts, watermark, text, signature, amateur, static, boring"
286
+ gen_kwargs["negative_prompt"] = default_negative
287
+ log_loading(f"🚫 Using default negative prompt for quality")
288
 
289
+ # Model-specific parameters
290
+ if MODEL_INFO["name"].startswith("CogVideoX"):
291
  gen_kwargs["num_videos_per_prompt"] = 1
292
+ log_loading(f"πŸŽ₯ CogVideoX generation starting...")
293
 
294
+ # Generate with progress
295
+ log_loading(f"πŸš€ H200 generation in progress...")
296
  result = MODEL(**gen_kwargs)
297
 
298
  end_time = time.time()
 
301
  # Extract video frames
302
  if hasattr(result, 'frames'):
303
  video_frames = result.frames[0]
304
+ log_loading(f"πŸ“Ή Extracted {len(video_frames)} frames")
305
  elif hasattr(result, 'videos'):
306
  video_frames = result.videos[0]
307
+ log_loading(f"πŸ“Ή Extracted video tensor: {video_frames.shape}")
308
  else:
309
+ log_loading(f"❌ Unknown result format: {type(result)}")
310
+ return None, "❌ Could not extract video frames from result"
311
+
312
+ # Export with proper FPS
313
+ target_fps = MODEL_INFO["fps"]
314
+ actual_duration = num_frames / target_fps
315
 
 
316
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
317
  from diffusers.utils import export_to_video
318
+ export_to_video(video_frames, tmp_file.name, fps=target_fps)
 
319
  video_path = tmp_file.name
320
+ log_loading(f"🎬 Exported to {tmp_file.name} @ {target_fps} fps")
321
 
322
  # Memory stats
323
  end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
324
  memory_used = end_memory - start_memory
325
 
326
+ # Success report
327
+ success_msg = f"""🎯 **PREMIUM H200 VIDEO GENERATED**
328
 
329
+ πŸ€– **Model:** {MODEL_INFO['name']}
330
  πŸ“ **Prompt:** {prompt}
331
+ 🎬 **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds**
332
  πŸ“ **Resolution:** {width}x{height}
333
+ βš™οΈ **Quality:** {num_inference_steps} inference steps
334
+ 🎯 **Guidance:** {guidance_scale}
335
  🎲 **Seed:** {seed}
336
+ ⏱️ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f} minutes)
337
  πŸ–₯️ **Device:** H200 MIG (69.5GB)
338
  πŸ’Ύ **Memory Used:** {memory_used:.1f}GB
339
+ πŸ“‹ **Model Notes:** {MODEL_INFO['description']}
340
+
341
+ **πŸŽ₯ Video Quality:** Premium quality with {num_frames} frames over {actual_duration:.1f} seconds"""
342
 
343
+ log_loading(f"βœ… PREMIUM generation completed: {actual_duration:.1f}s video in {generation_time:.1f}s")
344
 
345
  return video_path, success_msg
346
 
347
  except torch.cuda.OutOfMemoryError:
348
+ if HAS_CUDA:
349
+ torch.cuda.empty_cache()
350
  gc.collect()
351
+ return None, "❌ H200 memory exceeded. Try reducing frames or resolution."
352
 
353
  except Exception as e:
354
  if HAS_CUDA:
355
  torch.cuda.empty_cache()
356
  gc.collect()
357
+ error_msg = str(e)
358
+ log_loading(f"❌ Generation error: {error_msg}")
359
+ return None, f"❌ Premium generation failed: {error_msg}"
360
+
361
+ def get_model_status():
362
+ """Get current premium model status"""
363
+ if MODEL is None:
364
+ return "⏳ **No premium model loaded** - will auto-load on generation"
365
+
366
+ fps = MODEL_INFO["fps"]
367
+ max_frames = MODEL_INFO["max_frames"]
368
+ max_duration = max_frames / fps
369
+ resolutions = ", ".join([f"{w}x{h}" for w, h in MODEL_INFO["resolution_options"]])
370
+
371
+ return f"""🎯 **{MODEL_INFO['name']} Ready**
372
+
373
+ **πŸ“‹ Premium Capabilities:**
374
+ - **Max Duration:** {max_duration:.1f} seconds ({max_frames} frames @ {fps} fps)
375
+ - **Resolutions:** {resolutions}
376
+ - **Quality:** {MODEL_INFO['description']}
377
+
378
+ **⚑ H200 Optimizations:**
379
+ - Full model in GPU memory
380
+ - Memory efficient attention
381
+ - VAE optimizations enabled
382
+
383
+ **πŸ’‘ This model produces {max_duration:.1f} second videos with {max_frames} frames!**"""
384
 
385
  def get_loading_logs():
386
+ """Get formatted loading logs"""
387
  global LOADING_LOGS
388
  if not LOADING_LOGS:
389
  return "No loading attempts yet."
390
  return "\n".join(LOADING_LOGS)
391
 
392
+ def suggest_premium_settings():
393
+ """Suggest optimal settings for current model"""
394
+ if MODEL is None:
395
+ return "Load a premium model first."
396
 
397
+ model_name = MODEL_INFO['name']
398
+ max_frames = MODEL_INFO['max_frames']
399
+ fps = MODEL_INFO['fps']
400
+ max_duration = max_frames / fps
401
+
402
+ return f"""## 🎯 Optimal Settings for {model_name}
403
 
404
+ **πŸš€ Maximum Quality:**
405
+ - Frames: {max_frames} (full {max_duration:.1f} second video)
406
+ - Inference Steps: 50+
407
+ - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
408
+ - Resolution: {MODEL_INFO['resolution_options'][-1]}
409
 
410
+ **βš–οΈ Balanced (Recommended):**
411
+ - Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video)
412
+ - Inference Steps: 35-50
413
+ - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
 
 
 
 
 
 
 
 
 
 
 
414
 
415
+ **⚑ Fast Test:**
416
+ - Frames: 25 ({25/fps:.1f} second video)
417
+ - Inference Steps: 30
418
+ - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
 
 
 
419
 
420
+ **πŸ“ Premium Prompting Tips:**
421
+ - Be very specific and detailed
422
+ - Include camera movements: "slow zoom", "tracking shot"
423
+ - Describe lighting: "golden hour", "cinematic lighting"
424
+ - Add style: "professional cinematography", "8K quality"
425
+ - Mention motion: "smooth movement", "graceful motion"
426
+
427
+ **Example Premium Prompt:**
428
+ "A majestic golden eagle soaring gracefully through misty mountain peaks during golden hour, cinematic tracking shot with shallow depth of field, professional wildlife cinematography, smooth gliding motion, warm sunset lighting, 8K quality"
429
+
430
+ Remember: Longer videos need more detailed prompts to maintain coherence!"""
 
431
 
432
+ # Create premium interface
433
+ with gr.Blocks(title="H200 Premium Video Generator", theme=gr.themes.Glass()) as demo:
434
 
435
  gr.Markdown("""
436
+ # 🎯 H200 Premium Video Generator
437
 
438
+ **Premium Models Only** β€’ **Long-Form Videos** β€’ **Professional Quality**
439
+
440
+ *CogVideoX-5B β€’ LTX-Video β€’ No Low-Quality Fallbacks*
441
  """)
442
 
443
+ # Premium status
444
  with gr.Row():
445
  gr.Markdown("""
446
+ <div style="background: linear-gradient(45deg, #FFD700, #FF6B6B); padding: 15px; border-radius: 15px; text-align: center; color: white; font-weight: bold; font-size: 18px;">
447
+ πŸ† PREMIUM MODE - H200 MIG 69.5GB - QUALITY PRIORITY πŸ†
448
  </div>
449
  """)
450
 
451
+ with gr.Tab("🎬 Premium Generation"):
452
  with gr.Row():
453
  with gr.Column(scale=1):
454
  prompt_input = gr.Textbox(
455
+ label="πŸ“ Detailed Video Prompt (Premium Quality)",
456
+ placeholder="A breathtaking aerial view of a majestic golden eagle soaring gracefully through dramatic mountain peaks shrouded in morning mist, cinematic wildlife documentary style with slow motion tracking shot, professional cinematography with warm golden hour lighting and shallow depth of field, smooth gliding motion across epic landscape, 8K quality with film grain texture...",
457
+ lines=5,
458
+ max_lines=8
459
  )
460
 
461
  negative_prompt_input = gr.Textbox(
462
+ label="🚫 Negative Prompt (Optional - auto-applied for quality)",
463
+ placeholder="blurry, low quality, distorted, pixelated, compression artifacts, watermark, text, signature, amateur, static, boring, jerky motion...",
464
  lines=2
465
  )
466
 
467
+ with gr.Accordion("🎯 Premium Settings", open=True):
468
  with gr.Row():
469
+ num_frames = gr.Slider(
470
+ minimum=16,
471
+ maximum=49,
472
+ value=49,
473
+ step=1,
474
+ label="🎬 Video Frames (16 = 2s, 49 = 6s+)"
475
+ )
476
+
477
+ resolution = gr.Dropdown(
478
+ choices=["720x480", "480x720"],
479
+ value="720x480",
480
+ label="πŸ“ Resolution"
481
+ )
482
 
483
  with gr.Row():
484
+ num_steps = gr.Slider(
485
+ minimum=30,
486
+ maximum=100,
487
+ value=50,
488
+ step=5,
489
+ label="βš™οΈ Inference Steps (50+ for premium quality)"
490
+ )
491
+
492
+ guidance_scale = gr.Slider(
493
+ minimum=4.0,
494
+ maximum=10.0,
495
+ value=6.0,
496
+ step=0.5,
497
+ label="🎯 Guidance Scale"
498
+ )
499
+
500
+ seed = gr.Number(
501
+ label="🎲 Seed (-1 for random)",
502
+ value=-1,
503
+ precision=0
504
+ )
505
 
506
+ generate_btn = gr.Button(
507
+ "🎯 Generate Premium Video",
508
+ variant="primary",
509
+ size="lg"
510
+ )
511
 
512
  gr.Markdown("""
513
+ **⏱️ Premium Generation:** 2-5 minutes for quality
514
+
515
+ **πŸŽ₯ Output:** 2-6+ second high-quality videos
516
 
517
+ **πŸ’‘ Premium Tips:**
518
+ - Use very detailed, specific prompts
519
+ - Higher inference steps = better quality
520
+ - Longer videos need more descriptive prompts
521
  """)
522
 
523
  with gr.Column(scale=1):
524
+ video_output = gr.Video(
525
+ label="πŸŽ₯ Premium H200 Generated Video",
526
+ height=400
527
+ )
528
+
529
+ result_text = gr.Textbox(
530
+ label="πŸ“‹ Premium Generation Report",
531
+ lines=12,
532
+ show_copy_button=True
533
+ )
534
 
535
+ # Generate button
536
  generate_btn.click(
537
+ fn=generate_premium_video,
538
+ inputs=[
539
+ prompt_input, negative_prompt_input, num_frames,
540
+ resolution, num_steps, guidance_scale, seed
541
+ ],
542
  outputs=[video_output, result_text]
543
  )
544
 
545
+ # Premium examples
546
  gr.Examples(
547
  examples=[
548
  [
549
+ "A majestic golden eagle soaring gracefully through misty mountain peaks during golden hour, cinematic wildlife documentary style with slow motion tracking shot, professional cinematography with warm lighting and shallow depth of field, smooth gliding motion, 8K quality",
550
+ "blurry, low quality, static, amateur, pixelated",
551
+ 49, "720x480", 50, 6.0, 42
552
  ],
553
  [
554
+ "Ocean waves crashing against dramatic coastal cliffs during a storm, professional seascape cinematography with dynamic camera movement, slow motion water spray and foam, dramatic lighting with storm clouds, high contrast and deep blues, cinematic quality",
555
+ "calm, peaceful, low quality, static, boring",
556
+ 41, "720x480", 60, 6.5, 123
557
  ],
558
  [
559
+ "A steaming artisanal coffee cup on rustic wooden table by rain-streaked window, cozy cafe atmosphere with warm ambient lighting, shallow depth of field with bokeh background, steam rising elegantly, professional commercial cinematography, intimate close-up shot",
560
+ "cold, harsh lighting, plastic, fake, low quality, distorted",
561
+ 33, "720x480", 45, 6.0, 456
562
  ],
563
  [
564
+ "Time-lapse of cherry blossom petals falling like snow in traditional Japanese garden with wooden bridge over koi pond, peaceful zen atmosphere with soft natural lighting, seasonal transition captured in cinematic wide shot, perfect composition and color grading",
565
+ "modern, urban, chaotic, low quality, static, artificial",
566
+ 49, "720x480", 55, 6.5, 789
567
  ]
568
  ],
569
+ inputs=[prompt_input, negative_prompt_input, num_frames, resolution, num_steps, guidance_scale, seed]
570
  )
571
 
572
+ with gr.Tab("🎯 Premium Status"):
573
  with gr.Row():
574
+ status_btn = gr.Button("πŸ” Model Status", variant="secondary")
575
+ logs_btn = gr.Button("πŸ“‹ Loading Logs", variant="secondary")
576
+ settings_btn = gr.Button("βš™οΈ Optimal Settings", variant="secondary")
577
 
578
  status_output = gr.Markdown()
579
+ logs_output = gr.Textbox(label="Detailed Logs", lines=12, show_copy_button=True)
580
+ settings_output = gr.Markdown()
581
 
582
+ status_btn.click(fn=get_model_status, outputs=status_output)
583
  logs_btn.click(fn=get_loading_logs, outputs=logs_output)
584
+ settings_btn.click(fn=suggest_premium_settings, outputs=settings_output)
585
 
586
  # Auto-load status
587
+ demo.load(fn=get_model_status, outputs=status_output)
588
 
589
  if __name__ == "__main__":
590
+ demo.queue(max_size=2) # Premium quality needs smaller queue
591
  demo.launch(
592
  share=False,
593
  server_name="0.0.0.0",