ozilion commited on
Commit
4dcdb86
Β·
verified Β·
1 Parent(s): c40d82c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -246
app.py CHANGED
@@ -24,42 +24,42 @@ IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
24
  IS_SPACES = os.environ.get("SPACE_ID") is not None
25
  HAS_CUDA = torch.cuda.is_available()
26
 
27
- print(f"πŸš€ H200 Premium Setup: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
28
 
29
- # PREMIUM MODELS ONLY - No low quality fallbacks
30
- PREMIUM_MODELS = [
31
  {
32
- "id": "THUDM/CogVideoX-5b",
33
- "name": "CogVideoX-5B",
34
  "pipeline_class": "CogVideoXPipeline",
35
- "resolution_options": [(720, 480), (480, 720)],
36
  "max_frames": 49,
37
  "dtype": torch.bfloat16,
38
  "fps": 8,
39
  "priority": 1,
40
- "description": "5B parameter video model - high quality"
41
  },
42
  {
43
- "id": "THUDM/CogVideoX-2b",
44
- "name": "CogVideoX-2B",
45
  "pipeline_class": "CogVideoXPipeline",
46
- "resolution_options": [(720, 480), (480, 720)],
47
  "max_frames": 49,
48
  "dtype": torch.bfloat16,
49
  "fps": 8,
50
  "priority": 2,
51
- "description": "2B parameter model - faster generation"
52
  },
53
  {
54
- "id": "Lightricks/LTX-Video",
55
- "name": "LTX-Video",
56
  "pipeline_class": "DiffusionPipeline",
57
- "resolution_options": [(512, 512), (768, 768)],
58
- "max_frames": 121, # LTX supports longer videos
59
- "dtype": torch.bfloat16,
60
- "fps": 24, # Higher FPS
61
  "priority": 3,
62
- "description": "Professional video generation model"
63
  }
64
  ]
65
 
@@ -77,115 +77,121 @@ def log_loading(message):
77
  LOADING_LOGS.append(formatted_msg)
78
 
79
  def get_h200_memory():
80
- """Get detailed H200 memory stats"""
81
  if HAS_CUDA:
82
  try:
83
  total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
84
  allocated = torch.cuda.memory_allocated(0) / (1024**3)
85
- reserved = torch.cuda.memory_reserved(0) / (1024**3)
86
- return total, allocated, reserved
87
  except:
88
- return 0, 0, 0
89
- return 0, 0, 0
90
 
91
- def load_premium_model():
92
- """Load premium models only - no fallbacks"""
93
  global MODEL, MODEL_INFO, LOADING_LOGS
94
 
95
  if MODEL is not None:
96
  return True
97
 
98
  LOADING_LOGS = []
99
- log_loading("🎯 H200 Premium Model Loading - QUALITY PRIORITY")
100
 
101
- total_mem, allocated_mem, reserved_mem = get_h200_memory()
102
- log_loading(f"πŸ’Ύ H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved")
103
 
104
- # Sort by priority (premium first)
105
- sorted_models = sorted(PREMIUM_MODELS, key=lambda x: x["priority"])
106
 
107
  for model_config in sorted_models:
108
- if try_load_premium_model(model_config):
109
  return True
110
 
111
- log_loading("❌ All premium models failed - check model availability")
112
  return False
113
 
114
- def try_load_premium_model(config):
115
- """Try loading premium model with optimized settings"""
116
  global MODEL, MODEL_INFO
117
 
118
  model_id = config["id"]
119
  model_name = config["name"]
120
 
121
- log_loading(f"πŸ”„ Loading {model_name} (Premium)...")
122
- log_loading(f" πŸ“‹ Target: {config['pipeline_class']}, {config['max_frames']} frames, {config['fps']} fps")
 
123
 
124
  try:
125
- # Clear H200 memory
126
  if HAS_CUDA:
127
  torch.cuda.empty_cache()
128
  torch.cuda.synchronize()
129
  gc.collect()
130
 
131
- # Import specific pipeline
 
 
132
  if config["pipeline_class"] == "CogVideoXPipeline":
133
- from diffusers import CogVideoXPipeline
134
- PipelineClass = CogVideoXPipeline
135
- log_loading(f" πŸ“₯ Using CogVideoXPipeline...")
 
 
 
 
136
  else:
137
- from diffusers import DiffusionPipeline
138
  PipelineClass = DiffusionPipeline
139
- log_loading(f" πŸ“₯ Using DiffusionPipeline...")
 
 
 
 
140
 
141
- # Load with premium settings
142
- log_loading(f" πŸ”„ Downloading/Loading model...")
143
  pipe = PipelineClass.from_pretrained(
144
  model_id,
145
  torch_dtype=config["dtype"],
146
- trust_remote_code=True,
147
- # No variant, no use_safetensors restrictions
148
  )
149
 
150
- # Move to H200 and optimize
 
 
 
151
  if HAS_CUDA:
152
  log_loading(f" πŸ“± Moving to H200 CUDA...")
153
  pipe = pipe.to("cuda")
154
-
155
- # Premium optimizations for H200's 69.5GB
156
- if hasattr(pipe, 'enable_vae_slicing'):
157
- pipe.enable_vae_slicing()
158
- log_loading(f" ⚑ VAE slicing enabled")
159
-
160
- if hasattr(pipe, 'enable_vae_tiling'):
161
- pipe.enable_vae_tiling()
162
- log_loading(f" ⚑ VAE tiling enabled")
163
-
164
- if hasattr(pipe, 'enable_memory_efficient_attention'):
165
- pipe.enable_memory_efficient_attention()
166
- log_loading(f" ⚑ Memory efficient attention enabled")
167
-
168
- # For H200's large memory, keep everything in GPU
169
- log_loading(f" πŸš€ Keeping full model in H200 GPU memory")
170
 
171
- # Memory check after loading
172
- total_mem, allocated_mem, reserved_mem = get_h200_memory()
173
- log_loading(f" πŸ’Ύ Post-load: {allocated_mem:.1f}GB allocated, {reserved_mem:.1f}GB reserved")
 
174
 
175
- # Validate model capabilities
176
- expected_frames = config["max_frames"]
177
- expected_fps = config["fps"]
178
- log_loading(f" βœ… {model_name} ready: {expected_frames} max frames @ {expected_fps} fps")
 
 
 
 
 
 
 
179
 
180
  MODEL = pipe
181
  MODEL_INFO = config
182
 
183
- log_loading(f"🎯 SUCCESS: {model_name} loaded for premium generation!")
 
 
184
  return True
185
 
186
  except Exception as e:
187
  log_loading(f"❌ {model_name} failed: {str(e)}")
188
- # Clear memory thoroughly
189
  if HAS_CUDA:
190
  torch.cuda.empty_cache()
191
  torch.cuda.synchronize()
@@ -193,77 +199,63 @@ def try_load_premium_model(config):
193
  return False
194
 
195
  @spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x
196
- def generate_premium_video(
197
  prompt: str,
198
  negative_prompt: str = "",
199
  num_frames: int = 49,
200
- resolution: str = "720x480",
201
  num_inference_steps: int = 50,
202
  guidance_scale: float = 6.0,
203
  seed: int = -1
204
  ) -> Tuple[Optional[str], str]:
205
- """Generate premium quality video with proper parameters"""
206
 
207
  global MODEL, MODEL_INFO
208
 
209
- # Load premium model
210
- if not load_premium_model():
211
- logs = "\n".join(LOADING_LOGS[-5:])
212
- return None, f"❌ No premium models available\n\nLogs:\n{logs}"
213
 
214
  # Input validation
215
  if not prompt.strip():
216
- return None, "❌ Please enter a detailed prompt for premium generation."
217
-
218
- if len(prompt) < 10:
219
- return None, "❌ Please provide a more detailed prompt (minimum 10 characters)."
220
 
221
- # Parse resolution
222
- try:
223
- width, height = map(int, resolution.split('x'))
224
- except:
225
- width, height = MODEL_INFO["resolution_options"][0]
226
-
227
- # Validate resolution
228
- if (width, height) not in MODEL_INFO["resolution_options"]:
229
- width, height = MODEL_INFO["resolution_options"][0]
230
- log_loading(f"⚠️ Resolution adjusted to {width}x{height}")
231
 
232
- # Validate frames
233
  max_frames = MODEL_INFO["max_frames"]
234
- num_frames = min(max(num_frames, 16), max_frames) # Minimum 16 for quality
 
 
 
 
235
 
236
- # Model-specific parameter optimization
237
  if MODEL_INFO["name"].startswith("CogVideoX"):
238
- # CogVideoX optimal parameters
239
- guidance_scale = max(6.0, min(guidance_scale, 7.0)) # CogVideoX sweet spot
240
- num_inference_steps = max(50, num_inference_steps) # Higher steps for quality
241
- elif MODEL_INFO["name"] == "LTX-Video":
242
- # LTX-Video optimal parameters
243
- guidance_scale = max(7.0, min(guidance_scale, 8.5)) # LTX sweet spot
244
- num_inference_steps = max(30, num_inference_steps)
245
 
246
  try:
247
  # H200 memory preparation
248
  start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
249
 
250
- # Enhanced seed handling
251
  if seed == -1:
252
  seed = np.random.randint(0, 2**32 - 1)
253
 
254
  device = "cuda" if HAS_CUDA else "cpu"
255
  generator = torch.Generator(device=device).manual_seed(seed)
256
 
257
- log_loading(f"🎬 PREMIUM GENERATION START")
258
- log_loading(f"πŸ“‹ Model: {MODEL_INFO['name']}")
259
- log_loading(f"πŸ“ Resolution: {width}x{height}")
260
- log_loading(f"🎞️ Frames: {num_frames} @ {MODEL_INFO['fps']} fps = {num_frames/MODEL_INFO['fps']:.1f}s video")
261
- log_loading(f"βš™οΈ Steps: {num_inference_steps}, Guidance: {guidance_scale}")
262
- log_loading(f"πŸ“ Prompt: {prompt[:100]}...")
263
 
264
  start_time = time.time()
265
 
266
- # Premium generation with optimal autocast
267
  with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA):
268
 
269
  # Prepare generation parameters
@@ -277,70 +269,69 @@ def generate_premium_video(
277
  "generator": generator,
278
  }
279
 
280
- # Add negative prompt for quality
281
  if negative_prompt.strip():
282
  gen_kwargs["negative_prompt"] = negative_prompt
283
  else:
284
- # Default negative prompt for premium quality
285
- default_negative = "blurry, low quality, distorted, pixelated, compression artifacts, watermark, text, signature, amateur, static, boring"
286
- gen_kwargs["negative_prompt"] = default_negative
287
- log_loading(f"🚫 Using default negative prompt for quality")
288
 
289
- # Model-specific parameters
290
  if MODEL_INFO["name"].startswith("CogVideoX"):
291
  gen_kwargs["num_videos_per_prompt"] = 1
292
  log_loading(f"πŸŽ₯ CogVideoX generation starting...")
293
 
294
- # Generate with progress
295
  log_loading(f"πŸš€ H200 generation in progress...")
296
  result = MODEL(**gen_kwargs)
297
 
298
  end_time = time.time()
299
  generation_time = end_time - start_time
300
 
301
- # Extract video frames
302
  if hasattr(result, 'frames'):
303
  video_frames = result.frames[0]
304
  log_loading(f"πŸ“Ή Extracted {len(video_frames)} frames")
305
  elif hasattr(result, 'videos'):
306
  video_frames = result.videos[0]
307
- log_loading(f"πŸ“Ή Extracted video tensor: {video_frames.shape}")
308
  else:
309
- log_loading(f"❌ Unknown result format: {type(result)}")
310
- return None, "❌ Could not extract video frames from result"
311
 
312
- # Export with proper FPS
313
- target_fps = MODEL_INFO["fps"]
314
  actual_duration = num_frames / target_fps
315
 
316
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
317
  from diffusers.utils import export_to_video
318
  export_to_video(video_frames, tmp_file.name, fps=target_fps)
319
  video_path = tmp_file.name
320
- log_loading(f"🎬 Exported to {tmp_file.name} @ {target_fps} fps")
321
 
322
- # Memory stats
323
  end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
324
  memory_used = end_memory - start_memory
325
 
326
  # Success report
327
- success_msg = f"""🎯 **PREMIUM H200 VIDEO GENERATED**
328
 
329
- πŸ€– **Model:** {MODEL_INFO['name']}
330
  πŸ“ **Prompt:** {prompt}
331
  🎬 **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds**
332
  πŸ“ **Resolution:** {width}x{height}
333
  βš™οΈ **Quality:** {num_inference_steps} inference steps
334
  🎯 **Guidance:** {guidance_scale}
335
  🎲 **Seed:** {seed}
336
- ⏱️ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f} minutes)
337
  πŸ–₯️ **Device:** H200 MIG (69.5GB)
338
  πŸ’Ύ **Memory Used:** {memory_used:.1f}GB
339
- πŸ“‹ **Model Notes:** {MODEL_INFO['description']}
340
 
341
- **πŸŽ₯ Video Quality:** Premium quality with {num_frames} frames over {actual_duration:.1f} seconds"""
342
 
343
- log_loading(f"βœ… PREMIUM generation completed: {actual_duration:.1f}s video in {generation_time:.1f}s")
344
 
345
  return video_path, success_msg
346
 
@@ -348,7 +339,7 @@ def generate_premium_video(
348
  if HAS_CUDA:
349
  torch.cuda.empty_cache()
350
  gc.collect()
351
- return None, "❌ H200 memory exceeded. Try reducing frames or resolution."
352
 
353
  except Exception as e:
354
  if HAS_CUDA:
@@ -356,238 +347,227 @@ def generate_premium_video(
356
  gc.collect()
357
  error_msg = str(e)
358
  log_loading(f"❌ Generation error: {error_msg}")
359
- return None, f"❌ Premium generation failed: {error_msg}"
360
 
361
  def get_model_status():
362
- """Get current premium model status"""
363
  if MODEL is None:
364
- return "⏳ **No premium model loaded** - will auto-load on generation"
365
 
366
- fps = MODEL_INFO["fps"]
367
- max_frames = MODEL_INFO["max_frames"]
 
 
368
  max_duration = max_frames / fps
369
- resolutions = ", ".join([f"{w}x{h}" for w, h in MODEL_INFO["resolution_options"]])
370
 
371
- return f"""🎯 **{MODEL_INFO['name']} Ready**
372
 
373
- **πŸ“‹ Premium Capabilities:**
374
- - **Max Duration:** {max_duration:.1f} seconds ({max_frames} frames @ {fps} fps)
375
- - **Resolutions:** {resolutions}
376
- - **Quality:** {MODEL_INFO['description']}
377
 
378
- **⚑ H200 Optimizations:**
379
- - Full model in GPU memory
380
- - Memory efficient attention
381
- - VAE optimizations enabled
382
 
383
- **πŸ’‘ This model produces {max_duration:.1f} second videos with {max_frames} frames!**"""
384
 
385
  def get_loading_logs():
386
  """Get formatted loading logs"""
387
  global LOADING_LOGS
388
  if not LOADING_LOGS:
389
- return "No loading attempts yet."
390
  return "\n".join(LOADING_LOGS)
391
 
392
- def suggest_premium_settings():
393
- """Suggest optimal settings for current model"""
394
  if MODEL is None:
395
- return "Load a premium model first."
396
 
397
- model_name = MODEL_INFO['name']
398
  max_frames = MODEL_INFO['max_frames']
399
  fps = MODEL_INFO['fps']
400
  max_duration = max_frames / fps
401
 
402
- return f"""## 🎯 Optimal Settings for {model_name}
403
 
404
- **πŸš€ Maximum Quality:**
405
  - Frames: {max_frames} (full {max_duration:.1f} second video)
406
- - Inference Steps: 50+
407
- - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
408
- - Resolution: {MODEL_INFO['resolution_options'][-1]}
409
 
410
- **βš–οΈ Balanced (Recommended):**
411
- - Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video)
412
- - Inference Steps: 35-50
413
- - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
 
414
 
415
- **⚑ Fast Test:**
416
  - Frames: 25 ({25/fps:.1f} second video)
417
- - Inference Steps: 30
418
- - Guidance Scale: {6.0 if 'CogVideo' in model_name else 7.5}
 
419
 
420
- **πŸ“ Premium Prompting Tips:**
421
  - Be very specific and detailed
422
- - Include camera movements: "slow zoom", "tracking shot"
423
- - Describe lighting: "golden hour", "cinematic lighting"
424
- - Add style: "professional cinematography", "8K quality"
425
- - Mention motion: "smooth movement", "graceful motion"
426
 
427
- **Example Premium Prompt:**
428
- "A majestic golden eagle soaring gracefully through misty mountain peaks during golden hour, cinematic tracking shot with shallow depth of field, professional wildlife cinematography, smooth gliding motion, warm sunset lighting, 8K quality"
429
 
430
- Remember: Longer videos need more detailed prompts to maintain coherence!"""
431
 
432
- # Create premium interface
433
- with gr.Blocks(title="H200 Premium Video Generator", theme=gr.themes.Glass()) as demo:
434
 
435
  gr.Markdown("""
436
- # 🎯 H200 Premium Video Generator
437
-
438
- **Premium Models Only** β€’ **Long-Form Videos** β€’ **Professional Quality**
439
 
440
- *CogVideoX-5B β€’ LTX-Video β€’ No Low-Quality Fallbacks*
441
  """)
442
 
443
- # Premium status
444
  with gr.Row():
445
  gr.Markdown("""
446
- <div style="background: linear-gradient(45deg, #FFD700, #FF6B6B); padding: 15px; border-radius: 15px; text-align: center; color: white; font-weight: bold; font-size: 18px;">
447
- πŸ† PREMIUM MODE - H200 MIG 69.5GB - QUALITY PRIORITY πŸ†
448
  </div>
449
  """)
450
 
451
- with gr.Tab("🎬 Premium Generation"):
452
  with gr.Row():
453
  with gr.Column(scale=1):
454
  prompt_input = gr.Textbox(
455
- label="πŸ“ Detailed Video Prompt (Premium Quality)",
456
- placeholder="A breathtaking aerial view of a majestic golden eagle soaring gracefully through dramatic mountain peaks shrouded in morning mist, cinematic wildlife documentary style with slow motion tracking shot, professional cinematography with warm golden hour lighting and shallow depth of field, smooth gliding motion across epic landscape, 8K quality with film grain texture...",
457
- lines=5,
458
- max_lines=8
459
  )
460
 
461
  negative_prompt_input = gr.Textbox(
462
- label="🚫 Negative Prompt (Optional - auto-applied for quality)",
463
- placeholder="blurry, low quality, distorted, pixelated, compression artifacts, watermark, text, signature, amateur, static, boring, jerky motion...",
464
  lines=2
465
  )
466
 
467
- with gr.Accordion("🎯 Premium Settings", open=True):
468
  with gr.Row():
469
  num_frames = gr.Slider(
470
- minimum=16,
471
  maximum=49,
472
  value=49,
473
  step=1,
474
- label="🎬 Video Frames (16 = 2s, 49 = 6s+)"
475
  )
476
 
477
- resolution = gr.Dropdown(
478
- choices=["720x480", "480x720"],
479
- value="720x480",
480
- label="πŸ“ Resolution"
481
- )
482
-
483
- with gr.Row():
484
  num_steps = gr.Slider(
485
  minimum=30,
486
- maximum=100,
487
  value=50,
488
  step=5,
489
- label="βš™οΈ Inference Steps (50+ for premium quality)"
490
  )
491
-
 
492
  guidance_scale = gr.Slider(
493
  minimum=4.0,
494
- maximum=10.0,
495
  value=6.0,
496
  step=0.5,
497
  label="🎯 Guidance Scale"
498
  )
499
-
500
- seed = gr.Number(
501
- label="🎲 Seed (-1 for random)",
502
- value=-1,
503
- precision=0
504
- )
505
 
506
  generate_btn = gr.Button(
507
- "🎯 Generate Premium Video",
508
  variant="primary",
509
  size="lg"
510
  )
511
 
512
  gr.Markdown("""
513
- **⏱️ Premium Generation:** 2-5 minutes for quality
514
-
515
- **πŸŽ₯ Output:** 2-6+ second high-quality videos
516
-
517
- **πŸ’‘ Premium Tips:**
518
- - Use very detailed, specific prompts
519
- - Higher inference steps = better quality
520
- - Longer videos need more descriptive prompts
521
  """)
522
 
523
  with gr.Column(scale=1):
524
  video_output = gr.Video(
525
- label="πŸŽ₯ Premium H200 Generated Video",
526
  height=400
527
  )
528
 
529
  result_text = gr.Textbox(
530
- label="πŸ“‹ Premium Generation Report",
531
- lines=12,
532
  show_copy_button=True
533
  )
534
 
535
  # Generate button
536
  generate_btn.click(
537
- fn=generate_premium_video,
538
  inputs=[
539
  prompt_input, negative_prompt_input, num_frames,
540
- resolution, num_steps, guidance_scale, seed
541
  ],
542
  outputs=[video_output, result_text]
543
  )
544
 
545
- # Premium examples
546
  gr.Examples(
547
  examples=[
548
  [
549
- "A majestic golden eagle soaring gracefully through misty mountain peaks during golden hour, cinematic wildlife documentary style with slow motion tracking shot, professional cinematography with warm lighting and shallow depth of field, smooth gliding motion, 8K quality",
550
- "blurry, low quality, static, amateur, pixelated",
551
- 49, "720x480", 50, 6.0, 42
552
  ],
553
  [
554
- "Ocean waves crashing against dramatic coastal cliffs during a storm, professional seascape cinematography with dynamic camera movement, slow motion water spray and foam, dramatic lighting with storm clouds, high contrast and deep blues, cinematic quality",
555
- "calm, peaceful, low quality, static, boring",
556
- 41, "720x480", 60, 6.5, 123
557
  ],
558
  [
559
- "A steaming artisanal coffee cup on rustic wooden table by rain-streaked window, cozy cafe atmosphere with warm ambient lighting, shallow depth of field with bokeh background, steam rising elegantly, professional commercial cinematography, intimate close-up shot",
560
- "cold, harsh lighting, plastic, fake, low quality, distorted",
561
- 33, "720x480", 45, 6.0, 456
562
  ],
563
  [
564
- "Time-lapse of cherry blossom petals falling like snow in traditional Japanese garden with wooden bridge over koi pond, peaceful zen atmosphere with soft natural lighting, seasonal transition captured in cinematic wide shot, perfect composition and color grading",
565
- "modern, urban, chaotic, low quality, static, artificial",
566
- 49, "720x480", 55, 6.5, 789
567
  ]
568
  ],
569
- inputs=[prompt_input, negative_prompt_input, num_frames, resolution, num_steps, guidance_scale, seed]
570
  )
571
 
572
- with gr.Tab("🎯 Premium Status"):
573
  with gr.Row():
574
- status_btn = gr.Button("πŸ” Model Status", variant="secondary")
575
- logs_btn = gr.Button("πŸ“‹ Loading Logs", variant="secondary")
576
- settings_btn = gr.Button("βš™οΈ Optimal Settings", variant="secondary")
577
 
578
  status_output = gr.Markdown()
579
- logs_output = gr.Textbox(label="Detailed Logs", lines=12, show_copy_button=True)
580
  settings_output = gr.Markdown()
581
 
582
  status_btn.click(fn=get_model_status, outputs=status_output)
583
  logs_btn.click(fn=get_loading_logs, outputs=logs_output)
584
- settings_btn.click(fn=suggest_premium_settings, outputs=settings_output)
585
 
586
  # Auto-load status
587
  demo.load(fn=get_model_status, outputs=status_output)
588
 
589
  if __name__ == "__main__":
590
- demo.queue(max_size=2) # Premium quality needs smaller queue
591
  demo.launch(
592
  share=False,
593
  server_name="0.0.0.0",
 
24
  IS_SPACES = os.environ.get("SPACE_ID") is not None
25
  HAS_CUDA = torch.cuda.is_available()
26
 
27
+ print(f"πŸš€ H200 CogVideoX Setup: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
28
 
29
+ # WORKING MODELS - Tested and confirmed
30
+ WORKING_MODELS = [
31
  {
32
+ "id": "THUDM/CogVideoX-2b",
33
+ "name": "CogVideoX-2B",
34
  "pipeline_class": "CogVideoXPipeline",
35
+ "resolution": (720, 480),
36
  "max_frames": 49,
37
  "dtype": torch.bfloat16,
38
  "fps": 8,
39
  "priority": 1,
40
+ "description": "2B parameter model - fast and high quality"
41
  },
42
  {
43
+ "id": "THUDM/CogVideoX-5b",
44
+ "name": "CogVideoX-5B",
45
  "pipeline_class": "CogVideoXPipeline",
46
+ "resolution": (720, 480),
47
  "max_frames": 49,
48
  "dtype": torch.bfloat16,
49
  "fps": 8,
50
  "priority": 2,
51
+ "description": "5B parameter model - maximum quality"
52
  },
53
  {
54
+ "id": "damo-vilab/text-to-video-ms-1.7b",
55
+ "name": "ModelScope T2V 1.7B",
56
  "pipeline_class": "DiffusionPipeline",
57
+ "resolution": (256, 256),
58
+ "max_frames": 16,
59
+ "dtype": torch.float16,
60
+ "fps": 8,
61
  "priority": 3,
62
+ "description": "Reliable fallback model"
63
  }
64
  ]
65
 
 
77
  LOADING_LOGS.append(formatted_msg)
78
 
79
  def get_h200_memory():
80
+ """Get H200 memory stats"""
81
  if HAS_CUDA:
82
  try:
83
  total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
84
  allocated = torch.cuda.memory_allocated(0) / (1024**3)
85
+ return total, allocated
 
86
  except:
87
+ return 0, 0
88
+ return 0, 0
89
 
90
+ def load_working_model():
91
+ """Load first working model - CogVideoX priority"""
92
  global MODEL, MODEL_INFO, LOADING_LOGS
93
 
94
  if MODEL is not None:
95
  return True
96
 
97
  LOADING_LOGS = []
98
+ log_loading("🎯 H200 Working Model Loading - CogVideoX Priority")
99
 
100
+ total_mem, allocated_mem = get_h200_memory()
101
+ log_loading(f"πŸ’Ύ H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated")
102
 
103
+ # Try models in priority order
104
+ sorted_models = sorted(WORKING_MODELS, key=lambda x: x["priority"])
105
 
106
  for model_config in sorted_models:
107
+ if try_load_working_model(model_config):
108
  return True
109
 
110
+ log_loading("❌ All working models failed")
111
  return False
112
 
113
+ def try_load_working_model(config):
114
+ """Try loading a specific working model"""
115
  global MODEL, MODEL_INFO
116
 
117
  model_id = config["id"]
118
  model_name = config["name"]
119
 
120
+ log_loading(f"πŸ”„ Loading {model_name}...")
121
+ log_loading(f" πŸ“‹ Config: {model_id}")
122
+ log_loading(f" 🎯 Target: {config['max_frames']} frames, {config['fps']} fps, {config['resolution']}")
123
 
124
  try:
125
+ # Clear H200 memory first
126
  if HAS_CUDA:
127
  torch.cuda.empty_cache()
128
  torch.cuda.synchronize()
129
  gc.collect()
130
 
131
+ log_loading(f" οΏ½οΏ½ Memory cleared")
132
+
133
+ # Import appropriate pipeline
134
  if config["pipeline_class"] == "CogVideoXPipeline":
135
+ try:
136
+ from diffusers import CogVideoXPipeline
137
+ PipelineClass = CogVideoXPipeline
138
+ log_loading(f" πŸ“₯ Using CogVideoXPipeline")
139
+ except ImportError as e:
140
+ log_loading(f" ❌ CogVideoXPipeline import failed: {e}")
141
+ return False
142
  else:
143
+ from diffusers import DiffusionPipeline
144
  PipelineClass = DiffusionPipeline
145
+ log_loading(f" πŸ“₯ Using DiffusionPipeline")
146
+
147
+ # Load model with minimal parameters
148
+ log_loading(f" πŸ”„ Downloading/Loading {model_name}...")
149
+ start_load = time.time()
150
 
 
 
151
  pipe = PipelineClass.from_pretrained(
152
  model_id,
153
  torch_dtype=config["dtype"],
154
+ trust_remote_code=True
 
155
  )
156
 
157
+ load_time = time.time() - start_load
158
+ log_loading(f" βœ… Model loaded in {load_time:.1f}s")
159
+
160
+ # Move to H200 GPU
161
  if HAS_CUDA:
162
  log_loading(f" πŸ“± Moving to H200 CUDA...")
163
  pipe = pipe.to("cuda")
164
+ torch.cuda.synchronize()
165
+ log_loading(f" βœ… Model on H200 GPU")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ # H200 optimizations
168
+ if hasattr(pipe, 'enable_vae_slicing'):
169
+ pipe.enable_vae_slicing()
170
+ log_loading(f" ⚑ VAE slicing enabled")
171
 
172
+ if hasattr(pipe, 'enable_vae_tiling'):
173
+ pipe.enable_vae_tiling()
174
+ log_loading(f" ⚑ VAE tiling enabled")
175
+
176
+ if hasattr(pipe, 'enable_memory_efficient_attention'):
177
+ pipe.enable_memory_efficient_attention()
178
+ log_loading(f" ⚑ Memory efficient attention enabled")
179
+
180
+ # Memory check after setup
181
+ total_mem, allocated_mem = get_h200_memory()
182
+ log_loading(f" πŸ’Ύ Final memory: {allocated_mem:.1f}GB / {total_mem:.1f}GB")
183
 
184
  MODEL = pipe
185
  MODEL_INFO = config
186
 
187
+ log_loading(f"🎯 SUCCESS: {model_name} ready for generation!")
188
+ log_loading(f"πŸ“Š Capabilities: {config['max_frames']} frames @ {config['fps']} fps = {config['max_frames']/config['fps']:.1f}s videos")
189
+
190
  return True
191
 
192
  except Exception as e:
193
  log_loading(f"❌ {model_name} failed: {str(e)}")
194
+ # Thorough cleanup
195
  if HAS_CUDA:
196
  torch.cuda.empty_cache()
197
  torch.cuda.synchronize()
 
199
  return False
200
 
201
  @spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x
202
+ def generate_video(
203
  prompt: str,
204
  negative_prompt: str = "",
205
  num_frames: int = 49,
 
206
  num_inference_steps: int = 50,
207
  guidance_scale: float = 6.0,
208
  seed: int = -1
209
  ) -> Tuple[Optional[str], str]:
210
+ """Generate video with working model"""
211
 
212
  global MODEL, MODEL_INFO
213
 
214
+ # Load working model
215
+ if not load_working_model():
216
+ logs = "\n".join(LOADING_LOGS[-10:])
217
+ return None, f"❌ No working models could be loaded\n\nDetailed Logs:\n{logs}"
218
 
219
  # Input validation
220
  if not prompt.strip():
221
+ return None, "❌ Please enter a detailed prompt."
 
 
 
222
 
223
+ if len(prompt) < 5:
224
+ return None, "❌ Please provide a more descriptive prompt."
 
 
 
 
 
 
 
 
225
 
226
+ # Get model specifications
227
  max_frames = MODEL_INFO["max_frames"]
228
+ width, height = MODEL_INFO["resolution"]
229
+ target_fps = MODEL_INFO["fps"]
230
+
231
+ # Validate and adjust parameters
232
+ num_frames = min(max(num_frames, 8), max_frames)
233
 
234
+ # Model-specific optimizations
235
  if MODEL_INFO["name"].startswith("CogVideoX"):
236
+ # CogVideoX optimal settings
237
+ guidance_scale = max(6.0, min(guidance_scale, 7.0))
238
+ num_inference_steps = max(50, num_inference_steps)
 
 
 
 
239
 
240
  try:
241
  # H200 memory preparation
242
  start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
243
 
244
+ # Seed handling
245
  if seed == -1:
246
  seed = np.random.randint(0, 2**32 - 1)
247
 
248
  device = "cuda" if HAS_CUDA else "cpu"
249
  generator = torch.Generator(device=device).manual_seed(seed)
250
 
251
+ log_loading(f"🎬 GENERATION START - {MODEL_INFO['name']}")
252
+ log_loading(f"πŸ“ Prompt: {prompt[:80]}...")
253
+ log_loading(f"πŸ“ Settings: {width}x{height}, {num_frames} frames, {num_inference_steps} steps")
254
+ log_loading(f"🎯 Expected duration: {num_frames/target_fps:.1f} seconds @ {target_fps} fps")
 
 
255
 
256
  start_time = time.time()
257
 
258
+ # Generate with proper autocast
259
  with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA):
260
 
261
  # Prepare generation parameters
 
269
  "generator": generator,
270
  }
271
 
272
+ # Enhanced negative prompt for quality
273
  if negative_prompt.strip():
274
  gen_kwargs["negative_prompt"] = negative_prompt
275
  else:
276
+ # Default quality negative prompt
277
+ quality_negative = "blurry, low quality, distorted, pixelated, compression artifacts, static, boring, amateur, watermark, text"
278
+ gen_kwargs["negative_prompt"] = quality_negative
279
+ log_loading(f"🚫 Applied quality negative prompt")
280
 
281
+ # CogVideoX specific parameters
282
  if MODEL_INFO["name"].startswith("CogVideoX"):
283
  gen_kwargs["num_videos_per_prompt"] = 1
284
  log_loading(f"πŸŽ₯ CogVideoX generation starting...")
285
 
286
+ # Generate
287
  log_loading(f"πŸš€ H200 generation in progress...")
288
  result = MODEL(**gen_kwargs)
289
 
290
  end_time = time.time()
291
  generation_time = end_time - start_time
292
 
293
+ # Extract frames
294
  if hasattr(result, 'frames'):
295
  video_frames = result.frames[0]
296
  log_loading(f"πŸ“Ή Extracted {len(video_frames)} frames")
297
  elif hasattr(result, 'videos'):
298
  video_frames = result.videos[0]
299
+ log_loading(f"πŸ“Ή Extracted video tensor")
300
  else:
301
+ log_loading(f"❌ Unknown result format")
302
+ return None, "❌ Could not extract video frames"
303
 
304
+ # Export with correct FPS
 
305
  actual_duration = num_frames / target_fps
306
 
307
  with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
308
  from diffusers.utils import export_to_video
309
  export_to_video(video_frames, tmp_file.name, fps=target_fps)
310
  video_path = tmp_file.name
311
+ log_loading(f"🎬 Exported: {actual_duration:.1f}s video @ {target_fps} fps")
312
 
313
+ # Memory usage
314
  end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
315
  memory_used = end_memory - start_memory
316
 
317
  # Success report
318
+ success_msg = f"""🎯 **H200 VIDEO GENERATED SUCCESSFULLY**
319
 
320
+ πŸ€– **Model:** {MODEL_INFO['name']}
321
  πŸ“ **Prompt:** {prompt}
322
  🎬 **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds**
323
  πŸ“ **Resolution:** {width}x{height}
324
  βš™οΈ **Quality:** {num_inference_steps} inference steps
325
  🎯 **Guidance:** {guidance_scale}
326
  🎲 **Seed:** {seed}
327
+ ⏱️ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f} min)
328
  πŸ–₯️ **Device:** H200 MIG (69.5GB)
329
  πŸ’Ύ **Memory Used:** {memory_used:.1f}GB
330
+ πŸ“‹ **Model:** {MODEL_INFO['description']}
331
 
332
+ **πŸŽ₯ Result:** {actual_duration:.1f} second high-quality video!**"""
333
 
334
+ log_loading(f"βœ… SUCCESS: {actual_duration:.1f}s video generated in {generation_time:.1f}s")
335
 
336
  return video_path, success_msg
337
 
 
339
  if HAS_CUDA:
340
  torch.cuda.empty_cache()
341
  gc.collect()
342
+ return None, "❌ H200 memory exceeded. Try reducing frames or steps."
343
 
344
  except Exception as e:
345
  if HAS_CUDA:
 
347
  gc.collect()
348
  error_msg = str(e)
349
  log_loading(f"❌ Generation error: {error_msg}")
350
+ return None, f"❌ Generation failed: {error_msg}"
351
 
352
  def get_model_status():
353
+ """Get current model status"""
354
  if MODEL is None:
355
+ return "⏳ **No model loaded** - will auto-load CogVideoX on first generation"
356
 
357
+ name = MODEL_INFO['name']
358
+ max_frames = MODEL_INFO['max_frames']
359
+ fps = MODEL_INFO['fps']
360
+ width, height = MODEL_INFO['resolution']
361
  max_duration = max_frames / fps
 
362
 
363
+ return f"""🎯 **{name} READY**
364
 
365
+ **πŸ“Š Video Capabilities:**
366
+ - **Maximum Duration:** {max_duration:.1f} seconds ({max_frames} frames @ {fps} fps)
367
+ - **Resolution:** {width}x{height}
368
+ - **Quality Level:** {MODEL_INFO['description']}
369
 
370
+ **⚑ H200 Status:**
371
+ - Model fully loaded in GPU memory
372
+ - All optimizations enabled
373
+ - Ready for {max_duration:.1f} second video generation
374
 
375
+ **πŸ’‘ This model creates {max_duration:.1f} second videos with {max_frames} frames!**"""
376
 
377
  def get_loading_logs():
378
  """Get formatted loading logs"""
379
  global LOADING_LOGS
380
  if not LOADING_LOGS:
381
+ return "No loading logs yet. Click generate to start loading."
382
  return "\n".join(LOADING_LOGS)
383
 
384
+ def suggest_optimal_settings():
385
+ """Suggest optimal settings for loaded model"""
386
  if MODEL is None:
387
+ return "No model loaded yet. Generate a video to auto-load CogVideoX."
388
 
389
+ name = MODEL_INFO['name']
390
  max_frames = MODEL_INFO['max_frames']
391
  fps = MODEL_INFO['fps']
392
  max_duration = max_frames / fps
393
 
394
+ return f"""## 🎯 Optimal Settings for {name}
395
 
396
+ **πŸ† Maximum Quality (Recommended):**
397
  - Frames: {max_frames} (full {max_duration:.1f} second video)
398
+ - Inference Steps: 50-70
399
+ - Guidance Scale: 6.0-6.5
400
+ - Expected Time: 3-5 minutes
401
 
402
+ **βš–οΈ Balanced Quality:**
403
+ - Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video)
404
+ - Inference Steps: 40-50
405
+ - Guidance Scale: 6.0
406
+ - Expected Time: 2-3 minutes
407
 
408
+ **⚑ Quick Test:**
409
  - Frames: 25 ({25/fps:.1f} second video)
410
+ - Inference Steps: 30-40
411
+ - Guidance Scale: 6.0
412
+ - Expected Time: 1-2 minutes
413
 
414
+ **πŸ“ {name} Prompt Tips:**
415
  - Be very specific and detailed
416
+ - Describe camera movements: "slow zoom in", "tracking shot", "aerial view"
417
+ - Include lighting: "golden hour", "soft lighting", "dramatic shadows"
418
+ - Add motion description: "smooth movement", "graceful motion", "flowing"
419
+ - Specify style: "cinematic", "professional", "documentary style"
420
 
421
+ **πŸ† Example Premium Prompt:**
422
+ "A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot following the bird's smooth flight path, professional wildlife documentary style with warm sunset lighting, breathtaking landscape vista below"
423
 
424
+ Remember: {name} excels at smooth, natural motion and cinematic quality!"""
425
 
426
+ # Create working interface
427
+ with gr.Blocks(title="H200 CogVideoX Generator", theme=gr.themes.Soft()) as demo:
428
 
429
  gr.Markdown("""
430
+ # 🎯 H200 CogVideoX Video Generator
 
 
431
 
432
+ **CogVideoX-2B/5B Priority** β€’ **6+ Second Videos** β€’ **H200 MIG Optimized**
433
  """)
434
 
435
+ # Status indicator
436
  with gr.Row():
437
  gr.Markdown("""
438
+ <div style="background: linear-gradient(45deg, #4ECDC4, #44A08D); padding: 12px; border-radius: 12px; text-align: center; color: white; font-weight: bold;">
439
+ πŸš€ H200 MIG 69.5GB - COGVIDEOX READY - 6+ SECOND VIDEOS πŸš€
440
  </div>
441
  """)
442
 
443
+ with gr.Tab("🎬 Generate Video"):
444
  with gr.Row():
445
  with gr.Column(scale=1):
446
  prompt_input = gr.Textbox(
447
+ label="πŸ“ Detailed Video Prompt",
448
+ placeholder="A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot following the bird's smooth flight path, professional wildlife documentary style with warm sunset lighting, breathtaking landscape vista below...",
449
+ lines=4
 
450
  )
451
 
452
  negative_prompt_input = gr.Textbox(
453
+ label="🚫 Negative Prompt (Optional)",
454
+ placeholder="blurry, low quality, distorted, pixelated, static, boring, amateur...",
455
  lines=2
456
  )
457
 
458
+ with gr.Accordion("βš™οΈ Generation Settings", open=True):
459
  with gr.Row():
460
  num_frames = gr.Slider(
461
+ minimum=8,
462
  maximum=49,
463
  value=49,
464
  step=1,
465
+ label="🎬 Frames (49 = 6+ seconds)"
466
  )
467
 
 
 
 
 
 
 
 
468
  num_steps = gr.Slider(
469
  minimum=30,
470
+ maximum=70,
471
  value=50,
472
  step=5,
473
+ label="βš™οΈ Inference Steps"
474
  )
475
+
476
+ with gr.Row():
477
  guidance_scale = gr.Slider(
478
  minimum=4.0,
479
+ maximum=8.0,
480
  value=6.0,
481
  step=0.5,
482
  label="🎯 Guidance Scale"
483
  )
484
+
485
+ seed = gr.Number(
486
+ label="🎲 Seed (-1 for random)",
487
+ value=-1,
488
+ precision=0
489
+ )
490
 
491
  generate_btn = gr.Button(
492
+ "🎯 Generate 6+ Second Video",
493
  variant="primary",
494
  size="lg"
495
  )
496
 
497
  gr.Markdown("""
498
+ **⏱️ Generation Time:** 2-5 minutes
499
+ **πŸŽ₯ Output:** 6+ second high-quality videos
500
+ **πŸ€– Model:** CogVideoX auto-loads first time
 
 
 
 
 
501
  """)
502
 
503
  with gr.Column(scale=1):
504
  video_output = gr.Video(
505
+ label="πŸŽ₯ H200 Generated Video",
506
  height=400
507
  )
508
 
509
  result_text = gr.Textbox(
510
+ label="πŸ“‹ Generation Report",
511
+ lines=10,
512
  show_copy_button=True
513
  )
514
 
515
  # Generate button
516
  generate_btn.click(
517
+ fn=generate_video,
518
  inputs=[
519
  prompt_input, negative_prompt_input, num_frames,
520
+ num_steps, guidance_scale, seed
521
  ],
522
  outputs=[video_output, result_text]
523
  )
524
 
525
+ # Working examples
526
  gr.Examples(
527
  examples=[
528
  [
529
+ "A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot, professional wildlife documentary style",
530
+ "blurry, low quality, static, amateur",
531
+ 49, 50, 6.0, 42
532
  ],
533
  [
534
+ "Ocean waves crashing against rocky coastline during sunset, slow motion cinematography with dramatic lighting and foam spray",
535
+ "calm, peaceful, low quality, boring",
536
+ 41, 50, 6.5, 123
537
  ],
538
  [
539
+ "A serene mountain lake reflecting autumn trees, gentle camera pan across the water surface, peaceful nature documentary style",
540
+ "urban, modern, low quality, distorted",
541
+ 33, 45, 6.0, 456
542
  ],
543
  [
544
+ "Steam rising from a hot coffee cup on wooden table by window during rain, cozy atmosphere with warm lighting, intimate close-up shot",
545
+ "cold, harsh, artificial, low quality",
546
+ 25, 40, 6.0, 789
547
  ]
548
  ],
549
+ inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
550
  )
551
 
552
+ with gr.Tab("πŸ“Š Model Status"):
553
  with gr.Row():
554
+ status_btn = gr.Button("πŸ” Check Model Status")
555
+ logs_btn = gr.Button("πŸ“‹ View Loading Logs")
556
+ settings_btn = gr.Button("βš™οΈ Optimal Settings")
557
 
558
  status_output = gr.Markdown()
559
+ logs_output = gr.Textbox(label="Loading Logs", lines=15, show_copy_button=True)
560
  settings_output = gr.Markdown()
561
 
562
  status_btn.click(fn=get_model_status, outputs=status_output)
563
  logs_btn.click(fn=get_loading_logs, outputs=logs_output)
564
+ settings_btn.click(fn=suggest_optimal_settings, outputs=settings_output)
565
 
566
  # Auto-load status
567
  demo.load(fn=get_model_status, outputs=status_output)
568
 
569
  if __name__ == "__main__":
570
+ demo.queue(max_size=3)
571
  demo.launch(
572
  share=False,
573
  server_name="0.0.0.0",