Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,42 +24,42 @@ IS_ZERO_GPU = os.environ.get("SPACES_ZERO_GPU") == "true"
|
|
24 |
IS_SPACES = os.environ.get("SPACE_ID") is not None
|
25 |
HAS_CUDA = torch.cuda.is_available()
|
26 |
|
27 |
-
print(f"π H200
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
{
|
32 |
-
"id": "THUDM/CogVideoX-
|
33 |
-
"name": "CogVideoX-
|
34 |
"pipeline_class": "CogVideoXPipeline",
|
35 |
-
"
|
36 |
"max_frames": 49,
|
37 |
"dtype": torch.bfloat16,
|
38 |
"fps": 8,
|
39 |
"priority": 1,
|
40 |
-
"description": "
|
41 |
},
|
42 |
{
|
43 |
-
"id": "THUDM/CogVideoX-
|
44 |
-
"name": "CogVideoX-
|
45 |
"pipeline_class": "CogVideoXPipeline",
|
46 |
-
"
|
47 |
"max_frames": 49,
|
48 |
"dtype": torch.bfloat16,
|
49 |
"fps": 8,
|
50 |
"priority": 2,
|
51 |
-
"description": "
|
52 |
},
|
53 |
{
|
54 |
-
"id": "
|
55 |
-
"name": "
|
56 |
"pipeline_class": "DiffusionPipeline",
|
57 |
-
"
|
58 |
-
"max_frames":
|
59 |
-
"dtype": torch.
|
60 |
-
"fps":
|
61 |
"priority": 3,
|
62 |
-
"description": "
|
63 |
}
|
64 |
]
|
65 |
|
@@ -77,115 +77,121 @@ def log_loading(message):
|
|
77 |
LOADING_LOGS.append(formatted_msg)
|
78 |
|
79 |
def get_h200_memory():
|
80 |
-
"""Get
|
81 |
if HAS_CUDA:
|
82 |
try:
|
83 |
total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
84 |
allocated = torch.cuda.memory_allocated(0) / (1024**3)
|
85 |
-
|
86 |
-
return total, allocated, reserved
|
87 |
except:
|
88 |
-
return 0, 0
|
89 |
-
return 0, 0
|
90 |
|
91 |
-
def
|
92 |
-
"""Load
|
93 |
global MODEL, MODEL_INFO, LOADING_LOGS
|
94 |
|
95 |
if MODEL is not None:
|
96 |
return True
|
97 |
|
98 |
LOADING_LOGS = []
|
99 |
-
log_loading("π― H200
|
100 |
|
101 |
-
total_mem, allocated_mem
|
102 |
-
log_loading(f"πΎ H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated
|
103 |
|
104 |
-
#
|
105 |
-
sorted_models = sorted(
|
106 |
|
107 |
for model_config in sorted_models:
|
108 |
-
if
|
109 |
return True
|
110 |
|
111 |
-
log_loading("β All
|
112 |
return False
|
113 |
|
114 |
-
def
|
115 |
-
"""Try loading
|
116 |
global MODEL, MODEL_INFO
|
117 |
|
118 |
model_id = config["id"]
|
119 |
model_name = config["name"]
|
120 |
|
121 |
-
log_loading(f"π Loading {model_name}
|
122 |
-
log_loading(f" π
|
|
|
123 |
|
124 |
try:
|
125 |
-
# Clear H200 memory
|
126 |
if HAS_CUDA:
|
127 |
torch.cuda.empty_cache()
|
128 |
torch.cuda.synchronize()
|
129 |
gc.collect()
|
130 |
|
131 |
-
|
|
|
|
|
132 |
if config["pipeline_class"] == "CogVideoXPipeline":
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
136 |
else:
|
137 |
-
from diffusers import DiffusionPipeline
|
138 |
PipelineClass = DiffusionPipeline
|
139 |
-
log_loading(f" π₯ Using DiffusionPipeline
|
|
|
|
|
|
|
|
|
140 |
|
141 |
-
# Load with premium settings
|
142 |
-
log_loading(f" π Downloading/Loading model...")
|
143 |
pipe = PipelineClass.from_pretrained(
|
144 |
model_id,
|
145 |
torch_dtype=config["dtype"],
|
146 |
-
trust_remote_code=True
|
147 |
-
# No variant, no use_safetensors restrictions
|
148 |
)
|
149 |
|
150 |
-
|
|
|
|
|
|
|
151 |
if HAS_CUDA:
|
152 |
log_loading(f" π± Moving to H200 CUDA...")
|
153 |
pipe = pipe.to("cuda")
|
154 |
-
|
155 |
-
|
156 |
-
if hasattr(pipe, 'enable_vae_slicing'):
|
157 |
-
pipe.enable_vae_slicing()
|
158 |
-
log_loading(f" β‘ VAE slicing enabled")
|
159 |
-
|
160 |
-
if hasattr(pipe, 'enable_vae_tiling'):
|
161 |
-
pipe.enable_vae_tiling()
|
162 |
-
log_loading(f" β‘ VAE tiling enabled")
|
163 |
-
|
164 |
-
if hasattr(pipe, 'enable_memory_efficient_attention'):
|
165 |
-
pipe.enable_memory_efficient_attention()
|
166 |
-
log_loading(f" β‘ Memory efficient attention enabled")
|
167 |
-
|
168 |
-
# For H200's large memory, keep everything in GPU
|
169 |
-
log_loading(f" π Keeping full model in H200 GPU memory")
|
170 |
|
171 |
-
#
|
172 |
-
|
173 |
-
|
|
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
MODEL = pipe
|
181 |
MODEL_INFO = config
|
182 |
|
183 |
-
log_loading(f"π― SUCCESS: {model_name}
|
|
|
|
|
184 |
return True
|
185 |
|
186 |
except Exception as e:
|
187 |
log_loading(f"β {model_name} failed: {str(e)}")
|
188 |
-
#
|
189 |
if HAS_CUDA:
|
190 |
torch.cuda.empty_cache()
|
191 |
torch.cuda.synchronize()
|
@@ -193,77 +199,63 @@ def try_load_premium_model(config):
|
|
193 |
return False
|
194 |
|
195 |
@spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x
|
196 |
-
def
|
197 |
prompt: str,
|
198 |
negative_prompt: str = "",
|
199 |
num_frames: int = 49,
|
200 |
-
resolution: str = "720x480",
|
201 |
num_inference_steps: int = 50,
|
202 |
guidance_scale: float = 6.0,
|
203 |
seed: int = -1
|
204 |
) -> Tuple[Optional[str], str]:
|
205 |
-
"""Generate
|
206 |
|
207 |
global MODEL, MODEL_INFO
|
208 |
|
209 |
-
# Load
|
210 |
-
if not
|
211 |
-
logs = "\n".join(LOADING_LOGS[-
|
212 |
-
return None, f"β No
|
213 |
|
214 |
# Input validation
|
215 |
if not prompt.strip():
|
216 |
-
return None, "β Please enter a detailed prompt
|
217 |
-
|
218 |
-
if len(prompt) < 10:
|
219 |
-
return None, "β Please provide a more detailed prompt (minimum 10 characters)."
|
220 |
|
221 |
-
|
222 |
-
|
223 |
-
width, height = map(int, resolution.split('x'))
|
224 |
-
except:
|
225 |
-
width, height = MODEL_INFO["resolution_options"][0]
|
226 |
-
|
227 |
-
# Validate resolution
|
228 |
-
if (width, height) not in MODEL_INFO["resolution_options"]:
|
229 |
-
width, height = MODEL_INFO["resolution_options"][0]
|
230 |
-
log_loading(f"β οΈ Resolution adjusted to {width}x{height}")
|
231 |
|
232 |
-
#
|
233 |
max_frames = MODEL_INFO["max_frames"]
|
234 |
-
|
|
|
|
|
|
|
|
|
235 |
|
236 |
-
# Model-specific
|
237 |
if MODEL_INFO["name"].startswith("CogVideoX"):
|
238 |
-
# CogVideoX optimal
|
239 |
-
guidance_scale = max(6.0, min(guidance_scale, 7.0))
|
240 |
-
num_inference_steps = max(50, num_inference_steps)
|
241 |
-
elif MODEL_INFO["name"] == "LTX-Video":
|
242 |
-
# LTX-Video optimal parameters
|
243 |
-
guidance_scale = max(7.0, min(guidance_scale, 8.5)) # LTX sweet spot
|
244 |
-
num_inference_steps = max(30, num_inference_steps)
|
245 |
|
246 |
try:
|
247 |
# H200 memory preparation
|
248 |
start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
|
249 |
|
250 |
-
#
|
251 |
if seed == -1:
|
252 |
seed = np.random.randint(0, 2**32 - 1)
|
253 |
|
254 |
device = "cuda" if HAS_CUDA else "cpu"
|
255 |
generator = torch.Generator(device=device).manual_seed(seed)
|
256 |
|
257 |
-
log_loading(f"π¬
|
258 |
-
log_loading(f"
|
259 |
-
log_loading(f"π
|
260 |
-
log_loading(f"
|
261 |
-
log_loading(f"βοΈ Steps: {num_inference_steps}, Guidance: {guidance_scale}")
|
262 |
-
log_loading(f"π Prompt: {prompt[:100]}...")
|
263 |
|
264 |
start_time = time.time()
|
265 |
|
266 |
-
#
|
267 |
with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA):
|
268 |
|
269 |
# Prepare generation parameters
|
@@ -277,70 +269,69 @@ def generate_premium_video(
|
|
277 |
"generator": generator,
|
278 |
}
|
279 |
|
280 |
-
#
|
281 |
if negative_prompt.strip():
|
282 |
gen_kwargs["negative_prompt"] = negative_prompt
|
283 |
else:
|
284 |
-
# Default negative prompt
|
285 |
-
|
286 |
-
gen_kwargs["negative_prompt"] =
|
287 |
-
log_loading(f"π«
|
288 |
|
289 |
-
#
|
290 |
if MODEL_INFO["name"].startswith("CogVideoX"):
|
291 |
gen_kwargs["num_videos_per_prompt"] = 1
|
292 |
log_loading(f"π₯ CogVideoX generation starting...")
|
293 |
|
294 |
-
# Generate
|
295 |
log_loading(f"π H200 generation in progress...")
|
296 |
result = MODEL(**gen_kwargs)
|
297 |
|
298 |
end_time = time.time()
|
299 |
generation_time = end_time - start_time
|
300 |
|
301 |
-
# Extract
|
302 |
if hasattr(result, 'frames'):
|
303 |
video_frames = result.frames[0]
|
304 |
log_loading(f"πΉ Extracted {len(video_frames)} frames")
|
305 |
elif hasattr(result, 'videos'):
|
306 |
video_frames = result.videos[0]
|
307 |
-
log_loading(f"πΉ Extracted video tensor
|
308 |
else:
|
309 |
-
log_loading(f"β Unknown result format
|
310 |
-
return None, "β Could not extract video frames
|
311 |
|
312 |
-
# Export with
|
313 |
-
target_fps = MODEL_INFO["fps"]
|
314 |
actual_duration = num_frames / target_fps
|
315 |
|
316 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
317 |
from diffusers.utils import export_to_video
|
318 |
export_to_video(video_frames, tmp_file.name, fps=target_fps)
|
319 |
video_path = tmp_file.name
|
320 |
-
log_loading(f"π¬ Exported
|
321 |
|
322 |
-
# Memory
|
323 |
end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
|
324 |
memory_used = end_memory - start_memory
|
325 |
|
326 |
# Success report
|
327 |
-
success_msg = f"""π― **
|
328 |
|
329 |
-
π€ **Model:** {MODEL_INFO['name']}
|
330 |
π **Prompt:** {prompt}
|
331 |
π¬ **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds**
|
332 |
π **Resolution:** {width}x{height}
|
333 |
βοΈ **Quality:** {num_inference_steps} inference steps
|
334 |
π― **Guidance:** {guidance_scale}
|
335 |
π² **Seed:** {seed}
|
336 |
-
β±οΈ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f}
|
337 |
π₯οΈ **Device:** H200 MIG (69.5GB)
|
338 |
πΎ **Memory Used:** {memory_used:.1f}GB
|
339 |
-
π **Model
|
340 |
|
341 |
-
**π₯
|
342 |
|
343 |
-
log_loading(f"β
|
344 |
|
345 |
return video_path, success_msg
|
346 |
|
@@ -348,7 +339,7 @@ def generate_premium_video(
|
|
348 |
if HAS_CUDA:
|
349 |
torch.cuda.empty_cache()
|
350 |
gc.collect()
|
351 |
-
return None, "β H200 memory exceeded. Try reducing frames or
|
352 |
|
353 |
except Exception as e:
|
354 |
if HAS_CUDA:
|
@@ -356,238 +347,227 @@ def generate_premium_video(
|
|
356 |
gc.collect()
|
357 |
error_msg = str(e)
|
358 |
log_loading(f"β Generation error: {error_msg}")
|
359 |
-
return None, f"β
|
360 |
|
361 |
def get_model_status():
|
362 |
-
"""Get current
|
363 |
if MODEL is None:
|
364 |
-
return "β³ **No
|
365 |
|
366 |
-
|
367 |
-
max_frames = MODEL_INFO[
|
|
|
|
|
368 |
max_duration = max_frames / fps
|
369 |
-
resolutions = ", ".join([f"{w}x{h}" for w, h in MODEL_INFO["resolution_options"]])
|
370 |
|
371 |
-
return f"""π― **{
|
372 |
|
373 |
-
|
374 |
-
- **
|
375 |
-
- **
|
376 |
-
- **Quality:** {MODEL_INFO['description']}
|
377 |
|
378 |
-
**β‘ H200
|
379 |
-
-
|
380 |
-
-
|
381 |
-
-
|
382 |
|
383 |
-
**π‘ This model
|
384 |
|
385 |
def get_loading_logs():
|
386 |
"""Get formatted loading logs"""
|
387 |
global LOADING_LOGS
|
388 |
if not LOADING_LOGS:
|
389 |
-
return "No loading
|
390 |
return "\n".join(LOADING_LOGS)
|
391 |
|
392 |
-
def
|
393 |
-
"""Suggest optimal settings for
|
394 |
if MODEL is None:
|
395 |
-
return "
|
396 |
|
397 |
-
|
398 |
max_frames = MODEL_INFO['max_frames']
|
399 |
fps = MODEL_INFO['fps']
|
400 |
max_duration = max_frames / fps
|
401 |
|
402 |
-
return f"""## π― Optimal Settings for {
|
403 |
|
404 |
-
|
405 |
- Frames: {max_frames} (full {max_duration:.1f} second video)
|
406 |
-
- Inference Steps: 50
|
407 |
-
- Guidance Scale:
|
408 |
-
-
|
409 |
|
410 |
-
**βοΈ Balanced
|
411 |
-
- Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video)
|
412 |
-
- Inference Steps:
|
413 |
-
- Guidance Scale:
|
|
|
414 |
|
415 |
-
**β‘
|
416 |
- Frames: 25 ({25/fps:.1f} second video)
|
417 |
-
- Inference Steps: 30
|
418 |
-
- Guidance Scale:
|
|
|
419 |
|
420 |
-
**π
|
421 |
- Be very specific and detailed
|
422 |
-
-
|
423 |
-
-
|
424 |
-
- Add
|
425 |
-
-
|
426 |
|
427 |
-
|
428 |
-
"A majestic
|
429 |
|
430 |
-
Remember:
|
431 |
|
432 |
-
# Create
|
433 |
-
with gr.Blocks(title="H200
|
434 |
|
435 |
gr.Markdown("""
|
436 |
-
# π― H200
|
437 |
-
|
438 |
-
**Premium Models Only** β’ **Long-Form Videos** β’ **Professional Quality**
|
439 |
|
440 |
-
|
441 |
""")
|
442 |
|
443 |
-
#
|
444 |
with gr.Row():
|
445 |
gr.Markdown("""
|
446 |
-
<div style="background: linear-gradient(45deg, #
|
447 |
-
|
448 |
</div>
|
449 |
""")
|
450 |
|
451 |
-
with gr.Tab("π¬
|
452 |
with gr.Row():
|
453 |
with gr.Column(scale=1):
|
454 |
prompt_input = gr.Textbox(
|
455 |
-
label="π Detailed Video Prompt
|
456 |
-
placeholder="A
|
457 |
-
lines=
|
458 |
-
max_lines=8
|
459 |
)
|
460 |
|
461 |
negative_prompt_input = gr.Textbox(
|
462 |
-
label="π« Negative Prompt (Optional
|
463 |
-
placeholder="blurry, low quality, distorted, pixelated,
|
464 |
lines=2
|
465 |
)
|
466 |
|
467 |
-
with gr.Accordion("
|
468 |
with gr.Row():
|
469 |
num_frames = gr.Slider(
|
470 |
-
minimum=
|
471 |
maximum=49,
|
472 |
value=49,
|
473 |
step=1,
|
474 |
-
label="π¬
|
475 |
)
|
476 |
|
477 |
-
resolution = gr.Dropdown(
|
478 |
-
choices=["720x480", "480x720"],
|
479 |
-
value="720x480",
|
480 |
-
label="π Resolution"
|
481 |
-
)
|
482 |
-
|
483 |
-
with gr.Row():
|
484 |
num_steps = gr.Slider(
|
485 |
minimum=30,
|
486 |
-
maximum=
|
487 |
value=50,
|
488 |
step=5,
|
489 |
-
label="βοΈ Inference Steps
|
490 |
)
|
491 |
-
|
|
|
492 |
guidance_scale = gr.Slider(
|
493 |
minimum=4.0,
|
494 |
-
maximum=
|
495 |
value=6.0,
|
496 |
step=0.5,
|
497 |
label="π― Guidance Scale"
|
498 |
)
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
|
506 |
generate_btn = gr.Button(
|
507 |
-
"π― Generate
|
508 |
variant="primary",
|
509 |
size="lg"
|
510 |
)
|
511 |
|
512 |
gr.Markdown("""
|
513 |
-
**β±οΈ
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
**π‘ Premium Tips:**
|
518 |
-
- Use very detailed, specific prompts
|
519 |
-
- Higher inference steps = better quality
|
520 |
-
- Longer videos need more descriptive prompts
|
521 |
""")
|
522 |
|
523 |
with gr.Column(scale=1):
|
524 |
video_output = gr.Video(
|
525 |
-
label="π₯
|
526 |
height=400
|
527 |
)
|
528 |
|
529 |
result_text = gr.Textbox(
|
530 |
-
label="π
|
531 |
-
lines=
|
532 |
show_copy_button=True
|
533 |
)
|
534 |
|
535 |
# Generate button
|
536 |
generate_btn.click(
|
537 |
-
fn=
|
538 |
inputs=[
|
539 |
prompt_input, negative_prompt_input, num_frames,
|
540 |
-
|
541 |
],
|
542 |
outputs=[video_output, result_text]
|
543 |
)
|
544 |
|
545 |
-
#
|
546 |
gr.Examples(
|
547 |
examples=[
|
548 |
[
|
549 |
-
"A majestic
|
550 |
-
"blurry, low quality, static, amateur
|
551 |
-
49,
|
552 |
],
|
553 |
[
|
554 |
-
"Ocean waves crashing against
|
555 |
-
"calm, peaceful, low quality,
|
556 |
-
41,
|
557 |
],
|
558 |
[
|
559 |
-
"A
|
560 |
-
"
|
561 |
-
33,
|
562 |
],
|
563 |
[
|
564 |
-
"
|
565 |
-
"
|
566 |
-
|
567 |
]
|
568 |
],
|
569 |
-
inputs=[prompt_input, negative_prompt_input, num_frames,
|
570 |
)
|
571 |
|
572 |
-
with gr.Tab("
|
573 |
with gr.Row():
|
574 |
-
status_btn = gr.Button("π Model Status"
|
575 |
-
logs_btn = gr.Button("π Loading Logs"
|
576 |
-
settings_btn = gr.Button("βοΈ Optimal Settings"
|
577 |
|
578 |
status_output = gr.Markdown()
|
579 |
-
logs_output = gr.Textbox(label="
|
580 |
settings_output = gr.Markdown()
|
581 |
|
582 |
status_btn.click(fn=get_model_status, outputs=status_output)
|
583 |
logs_btn.click(fn=get_loading_logs, outputs=logs_output)
|
584 |
-
settings_btn.click(fn=
|
585 |
|
586 |
# Auto-load status
|
587 |
demo.load(fn=get_model_status, outputs=status_output)
|
588 |
|
589 |
if __name__ == "__main__":
|
590 |
-
demo.queue(max_size=
|
591 |
demo.launch(
|
592 |
share=False,
|
593 |
server_name="0.0.0.0",
|
|
|
24 |
IS_SPACES = os.environ.get("SPACE_ID") is not None
|
25 |
HAS_CUDA = torch.cuda.is_available()
|
26 |
|
27 |
+
print(f"π H200 CogVideoX Setup: ZeroGPU={IS_ZERO_GPU}, Spaces={IS_SPACES}, CUDA={HAS_CUDA}")
|
28 |
|
29 |
+
# WORKING MODELS - Tested and confirmed
|
30 |
+
WORKING_MODELS = [
|
31 |
{
|
32 |
+
"id": "THUDM/CogVideoX-2b",
|
33 |
+
"name": "CogVideoX-2B",
|
34 |
"pipeline_class": "CogVideoXPipeline",
|
35 |
+
"resolution": (720, 480),
|
36 |
"max_frames": 49,
|
37 |
"dtype": torch.bfloat16,
|
38 |
"fps": 8,
|
39 |
"priority": 1,
|
40 |
+
"description": "2B parameter model - fast and high quality"
|
41 |
},
|
42 |
{
|
43 |
+
"id": "THUDM/CogVideoX-5b",
|
44 |
+
"name": "CogVideoX-5B",
|
45 |
"pipeline_class": "CogVideoXPipeline",
|
46 |
+
"resolution": (720, 480),
|
47 |
"max_frames": 49,
|
48 |
"dtype": torch.bfloat16,
|
49 |
"fps": 8,
|
50 |
"priority": 2,
|
51 |
+
"description": "5B parameter model - maximum quality"
|
52 |
},
|
53 |
{
|
54 |
+
"id": "damo-vilab/text-to-video-ms-1.7b",
|
55 |
+
"name": "ModelScope T2V 1.7B",
|
56 |
"pipeline_class": "DiffusionPipeline",
|
57 |
+
"resolution": (256, 256),
|
58 |
+
"max_frames": 16,
|
59 |
+
"dtype": torch.float16,
|
60 |
+
"fps": 8,
|
61 |
"priority": 3,
|
62 |
+
"description": "Reliable fallback model"
|
63 |
}
|
64 |
]
|
65 |
|
|
|
77 |
LOADING_LOGS.append(formatted_msg)
|
78 |
|
79 |
def get_h200_memory():
|
80 |
+
"""Get H200 memory stats"""
|
81 |
if HAS_CUDA:
|
82 |
try:
|
83 |
total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
84 |
allocated = torch.cuda.memory_allocated(0) / (1024**3)
|
85 |
+
return total, allocated
|
|
|
86 |
except:
|
87 |
+
return 0, 0
|
88 |
+
return 0, 0
|
89 |
|
90 |
+
def load_working_model():
|
91 |
+
"""Load first working model - CogVideoX priority"""
|
92 |
global MODEL, MODEL_INFO, LOADING_LOGS
|
93 |
|
94 |
if MODEL is not None:
|
95 |
return True
|
96 |
|
97 |
LOADING_LOGS = []
|
98 |
+
log_loading("π― H200 Working Model Loading - CogVideoX Priority")
|
99 |
|
100 |
+
total_mem, allocated_mem = get_h200_memory()
|
101 |
+
log_loading(f"πΎ H200 Memory: {total_mem:.1f}GB total, {allocated_mem:.1f}GB allocated")
|
102 |
|
103 |
+
# Try models in priority order
|
104 |
+
sorted_models = sorted(WORKING_MODELS, key=lambda x: x["priority"])
|
105 |
|
106 |
for model_config in sorted_models:
|
107 |
+
if try_load_working_model(model_config):
|
108 |
return True
|
109 |
|
110 |
+
log_loading("β All working models failed")
|
111 |
return False
|
112 |
|
113 |
+
def try_load_working_model(config):
|
114 |
+
"""Try loading a specific working model"""
|
115 |
global MODEL, MODEL_INFO
|
116 |
|
117 |
model_id = config["id"]
|
118 |
model_name = config["name"]
|
119 |
|
120 |
+
log_loading(f"π Loading {model_name}...")
|
121 |
+
log_loading(f" π Config: {model_id}")
|
122 |
+
log_loading(f" π― Target: {config['max_frames']} frames, {config['fps']} fps, {config['resolution']}")
|
123 |
|
124 |
try:
|
125 |
+
# Clear H200 memory first
|
126 |
if HAS_CUDA:
|
127 |
torch.cuda.empty_cache()
|
128 |
torch.cuda.synchronize()
|
129 |
gc.collect()
|
130 |
|
131 |
+
log_loading(f" οΏ½οΏ½ Memory cleared")
|
132 |
+
|
133 |
+
# Import appropriate pipeline
|
134 |
if config["pipeline_class"] == "CogVideoXPipeline":
|
135 |
+
try:
|
136 |
+
from diffusers import CogVideoXPipeline
|
137 |
+
PipelineClass = CogVideoXPipeline
|
138 |
+
log_loading(f" π₯ Using CogVideoXPipeline")
|
139 |
+
except ImportError as e:
|
140 |
+
log_loading(f" β CogVideoXPipeline import failed: {e}")
|
141 |
+
return False
|
142 |
else:
|
143 |
+
from diffusers import DiffusionPipeline
|
144 |
PipelineClass = DiffusionPipeline
|
145 |
+
log_loading(f" π₯ Using DiffusionPipeline")
|
146 |
+
|
147 |
+
# Load model with minimal parameters
|
148 |
+
log_loading(f" π Downloading/Loading {model_name}...")
|
149 |
+
start_load = time.time()
|
150 |
|
|
|
|
|
151 |
pipe = PipelineClass.from_pretrained(
|
152 |
model_id,
|
153 |
torch_dtype=config["dtype"],
|
154 |
+
trust_remote_code=True
|
|
|
155 |
)
|
156 |
|
157 |
+
load_time = time.time() - start_load
|
158 |
+
log_loading(f" β
Model loaded in {load_time:.1f}s")
|
159 |
+
|
160 |
+
# Move to H200 GPU
|
161 |
if HAS_CUDA:
|
162 |
log_loading(f" π± Moving to H200 CUDA...")
|
163 |
pipe = pipe.to("cuda")
|
164 |
+
torch.cuda.synchronize()
|
165 |
+
log_loading(f" β
Model on H200 GPU")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
+
# H200 optimizations
|
168 |
+
if hasattr(pipe, 'enable_vae_slicing'):
|
169 |
+
pipe.enable_vae_slicing()
|
170 |
+
log_loading(f" β‘ VAE slicing enabled")
|
171 |
|
172 |
+
if hasattr(pipe, 'enable_vae_tiling'):
|
173 |
+
pipe.enable_vae_tiling()
|
174 |
+
log_loading(f" β‘ VAE tiling enabled")
|
175 |
+
|
176 |
+
if hasattr(pipe, 'enable_memory_efficient_attention'):
|
177 |
+
pipe.enable_memory_efficient_attention()
|
178 |
+
log_loading(f" β‘ Memory efficient attention enabled")
|
179 |
+
|
180 |
+
# Memory check after setup
|
181 |
+
total_mem, allocated_mem = get_h200_memory()
|
182 |
+
log_loading(f" πΎ Final memory: {allocated_mem:.1f}GB / {total_mem:.1f}GB")
|
183 |
|
184 |
MODEL = pipe
|
185 |
MODEL_INFO = config
|
186 |
|
187 |
+
log_loading(f"π― SUCCESS: {model_name} ready for generation!")
|
188 |
+
log_loading(f"π Capabilities: {config['max_frames']} frames @ {config['fps']} fps = {config['max_frames']/config['fps']:.1f}s videos")
|
189 |
+
|
190 |
return True
|
191 |
|
192 |
except Exception as e:
|
193 |
log_loading(f"β {model_name} failed: {str(e)}")
|
194 |
+
# Thorough cleanup
|
195 |
if HAS_CUDA:
|
196 |
torch.cuda.empty_cache()
|
197 |
torch.cuda.synchronize()
|
|
|
199 |
return False
|
200 |
|
201 |
@spaces.GPU(duration=300) if SPACES_AVAILABLE else lambda x: x
|
202 |
+
def generate_video(
|
203 |
prompt: str,
|
204 |
negative_prompt: str = "",
|
205 |
num_frames: int = 49,
|
|
|
206 |
num_inference_steps: int = 50,
|
207 |
guidance_scale: float = 6.0,
|
208 |
seed: int = -1
|
209 |
) -> Tuple[Optional[str], str]:
|
210 |
+
"""Generate video with working model"""
|
211 |
|
212 |
global MODEL, MODEL_INFO
|
213 |
|
214 |
+
# Load working model
|
215 |
+
if not load_working_model():
|
216 |
+
logs = "\n".join(LOADING_LOGS[-10:])
|
217 |
+
return None, f"β No working models could be loaded\n\nDetailed Logs:\n{logs}"
|
218 |
|
219 |
# Input validation
|
220 |
if not prompt.strip():
|
221 |
+
return None, "β Please enter a detailed prompt."
|
|
|
|
|
|
|
222 |
|
223 |
+
if len(prompt) < 5:
|
224 |
+
return None, "β Please provide a more descriptive prompt."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
+
# Get model specifications
|
227 |
max_frames = MODEL_INFO["max_frames"]
|
228 |
+
width, height = MODEL_INFO["resolution"]
|
229 |
+
target_fps = MODEL_INFO["fps"]
|
230 |
+
|
231 |
+
# Validate and adjust parameters
|
232 |
+
num_frames = min(max(num_frames, 8), max_frames)
|
233 |
|
234 |
+
# Model-specific optimizations
|
235 |
if MODEL_INFO["name"].startswith("CogVideoX"):
|
236 |
+
# CogVideoX optimal settings
|
237 |
+
guidance_scale = max(6.0, min(guidance_scale, 7.0))
|
238 |
+
num_inference_steps = max(50, num_inference_steps)
|
|
|
|
|
|
|
|
|
239 |
|
240 |
try:
|
241 |
# H200 memory preparation
|
242 |
start_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
|
243 |
|
244 |
+
# Seed handling
|
245 |
if seed == -1:
|
246 |
seed = np.random.randint(0, 2**32 - 1)
|
247 |
|
248 |
device = "cuda" if HAS_CUDA else "cpu"
|
249 |
generator = torch.Generator(device=device).manual_seed(seed)
|
250 |
|
251 |
+
log_loading(f"π¬ GENERATION START - {MODEL_INFO['name']}")
|
252 |
+
log_loading(f"π Prompt: {prompt[:80]}...")
|
253 |
+
log_loading(f"π Settings: {width}x{height}, {num_frames} frames, {num_inference_steps} steps")
|
254 |
+
log_loading(f"π― Expected duration: {num_frames/target_fps:.1f} seconds @ {target_fps} fps")
|
|
|
|
|
255 |
|
256 |
start_time = time.time()
|
257 |
|
258 |
+
# Generate with proper autocast
|
259 |
with torch.autocast(device, dtype=MODEL_INFO["dtype"], enabled=HAS_CUDA):
|
260 |
|
261 |
# Prepare generation parameters
|
|
|
269 |
"generator": generator,
|
270 |
}
|
271 |
|
272 |
+
# Enhanced negative prompt for quality
|
273 |
if negative_prompt.strip():
|
274 |
gen_kwargs["negative_prompt"] = negative_prompt
|
275 |
else:
|
276 |
+
# Default quality negative prompt
|
277 |
+
quality_negative = "blurry, low quality, distorted, pixelated, compression artifacts, static, boring, amateur, watermark, text"
|
278 |
+
gen_kwargs["negative_prompt"] = quality_negative
|
279 |
+
log_loading(f"π« Applied quality negative prompt")
|
280 |
|
281 |
+
# CogVideoX specific parameters
|
282 |
if MODEL_INFO["name"].startswith("CogVideoX"):
|
283 |
gen_kwargs["num_videos_per_prompt"] = 1
|
284 |
log_loading(f"π₯ CogVideoX generation starting...")
|
285 |
|
286 |
+
# Generate
|
287 |
log_loading(f"π H200 generation in progress...")
|
288 |
result = MODEL(**gen_kwargs)
|
289 |
|
290 |
end_time = time.time()
|
291 |
generation_time = end_time - start_time
|
292 |
|
293 |
+
# Extract frames
|
294 |
if hasattr(result, 'frames'):
|
295 |
video_frames = result.frames[0]
|
296 |
log_loading(f"πΉ Extracted {len(video_frames)} frames")
|
297 |
elif hasattr(result, 'videos'):
|
298 |
video_frames = result.videos[0]
|
299 |
+
log_loading(f"πΉ Extracted video tensor")
|
300 |
else:
|
301 |
+
log_loading(f"β Unknown result format")
|
302 |
+
return None, "β Could not extract video frames"
|
303 |
|
304 |
+
# Export with correct FPS
|
|
|
305 |
actual_duration = num_frames / target_fps
|
306 |
|
307 |
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp_file:
|
308 |
from diffusers.utils import export_to_video
|
309 |
export_to_video(video_frames, tmp_file.name, fps=target_fps)
|
310 |
video_path = tmp_file.name
|
311 |
+
log_loading(f"π¬ Exported: {actual_duration:.1f}s video @ {target_fps} fps")
|
312 |
|
313 |
+
# Memory usage
|
314 |
end_memory = torch.cuda.memory_allocated(0) / (1024**3) if HAS_CUDA else 0
|
315 |
memory_used = end_memory - start_memory
|
316 |
|
317 |
# Success report
|
318 |
+
success_msg = f"""π― **H200 VIDEO GENERATED SUCCESSFULLY**
|
319 |
|
320 |
+
π€ **Model:** {MODEL_INFO['name']}
|
321 |
π **Prompt:** {prompt}
|
322 |
π¬ **Video:** {num_frames} frames @ {target_fps} fps = **{actual_duration:.1f} seconds**
|
323 |
π **Resolution:** {width}x{height}
|
324 |
βοΈ **Quality:** {num_inference_steps} inference steps
|
325 |
π― **Guidance:** {guidance_scale}
|
326 |
π² **Seed:** {seed}
|
327 |
+
β±οΈ **Generation Time:** {generation_time:.1f}s ({generation_time/60:.1f} min)
|
328 |
π₯οΈ **Device:** H200 MIG (69.5GB)
|
329 |
πΎ **Memory Used:** {memory_used:.1f}GB
|
330 |
+
π **Model:** {MODEL_INFO['description']}
|
331 |
|
332 |
+
**π₯ Result:** {actual_duration:.1f} second high-quality video!**"""
|
333 |
|
334 |
+
log_loading(f"β
SUCCESS: {actual_duration:.1f}s video generated in {generation_time:.1f}s")
|
335 |
|
336 |
return video_path, success_msg
|
337 |
|
|
|
339 |
if HAS_CUDA:
|
340 |
torch.cuda.empty_cache()
|
341 |
gc.collect()
|
342 |
+
return None, "β H200 memory exceeded. Try reducing frames or steps."
|
343 |
|
344 |
except Exception as e:
|
345 |
if HAS_CUDA:
|
|
|
347 |
gc.collect()
|
348 |
error_msg = str(e)
|
349 |
log_loading(f"β Generation error: {error_msg}")
|
350 |
+
return None, f"β Generation failed: {error_msg}"
|
351 |
|
352 |
def get_model_status():
|
353 |
+
"""Get current model status"""
|
354 |
if MODEL is None:
|
355 |
+
return "β³ **No model loaded** - will auto-load CogVideoX on first generation"
|
356 |
|
357 |
+
name = MODEL_INFO['name']
|
358 |
+
max_frames = MODEL_INFO['max_frames']
|
359 |
+
fps = MODEL_INFO['fps']
|
360 |
+
width, height = MODEL_INFO['resolution']
|
361 |
max_duration = max_frames / fps
|
|
|
362 |
|
363 |
+
return f"""π― **{name} READY**
|
364 |
|
365 |
+
**π Video Capabilities:**
|
366 |
+
- **Maximum Duration:** {max_duration:.1f} seconds ({max_frames} frames @ {fps} fps)
|
367 |
+
- **Resolution:** {width}x{height}
|
368 |
+
- **Quality Level:** {MODEL_INFO['description']}
|
369 |
|
370 |
+
**β‘ H200 Status:**
|
371 |
+
- Model fully loaded in GPU memory
|
372 |
+
- All optimizations enabled
|
373 |
+
- Ready for {max_duration:.1f} second video generation
|
374 |
|
375 |
+
**π‘ This model creates {max_duration:.1f} second videos with {max_frames} frames!**"""
|
376 |
|
377 |
def get_loading_logs():
|
378 |
"""Get formatted loading logs"""
|
379 |
global LOADING_LOGS
|
380 |
if not LOADING_LOGS:
|
381 |
+
return "No loading logs yet. Click generate to start loading."
|
382 |
return "\n".join(LOADING_LOGS)
|
383 |
|
384 |
+
def suggest_optimal_settings():
|
385 |
+
"""Suggest optimal settings for loaded model"""
|
386 |
if MODEL is None:
|
387 |
+
return "No model loaded yet. Generate a video to auto-load CogVideoX."
|
388 |
|
389 |
+
name = MODEL_INFO['name']
|
390 |
max_frames = MODEL_INFO['max_frames']
|
391 |
fps = MODEL_INFO['fps']
|
392 |
max_duration = max_frames / fps
|
393 |
|
394 |
+
return f"""## π― Optimal Settings for {name}
|
395 |
|
396 |
+
**π Maximum Quality (Recommended):**
|
397 |
- Frames: {max_frames} (full {max_duration:.1f} second video)
|
398 |
+
- Inference Steps: 50-70
|
399 |
+
- Guidance Scale: 6.0-6.5
|
400 |
+
- Expected Time: 3-5 minutes
|
401 |
|
402 |
+
**βοΈ Balanced Quality:**
|
403 |
+
- Frames: {max_frames//2} ({max_frames//2/fps:.1f} second video)
|
404 |
+
- Inference Steps: 40-50
|
405 |
+
- Guidance Scale: 6.0
|
406 |
+
- Expected Time: 2-3 minutes
|
407 |
|
408 |
+
**β‘ Quick Test:**
|
409 |
- Frames: 25 ({25/fps:.1f} second video)
|
410 |
+
- Inference Steps: 30-40
|
411 |
+
- Guidance Scale: 6.0
|
412 |
+
- Expected Time: 1-2 minutes
|
413 |
|
414 |
+
**π {name} Prompt Tips:**
|
415 |
- Be very specific and detailed
|
416 |
+
- Describe camera movements: "slow zoom in", "tracking shot", "aerial view"
|
417 |
+
- Include lighting: "golden hour", "soft lighting", "dramatic shadows"
|
418 |
+
- Add motion description: "smooth movement", "graceful motion", "flowing"
|
419 |
+
- Specify style: "cinematic", "professional", "documentary style"
|
420 |
|
421 |
+
**π Example Premium Prompt:**
|
422 |
+
"A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot following the bird's smooth flight path, professional wildlife documentary style with warm sunset lighting, breathtaking landscape vista below"
|
423 |
|
424 |
+
Remember: {name} excels at smooth, natural motion and cinematic quality!"""
|
425 |
|
426 |
+
# Create working interface
|
427 |
+
with gr.Blocks(title="H200 CogVideoX Generator", theme=gr.themes.Soft()) as demo:
|
428 |
|
429 |
gr.Markdown("""
|
430 |
+
# π― H200 CogVideoX Video Generator
|
|
|
|
|
431 |
|
432 |
+
**CogVideoX-2B/5B Priority** β’ **6+ Second Videos** β’ **H200 MIG Optimized**
|
433 |
""")
|
434 |
|
435 |
+
# Status indicator
|
436 |
with gr.Row():
|
437 |
gr.Markdown("""
|
438 |
+
<div style="background: linear-gradient(45deg, #4ECDC4, #44A08D); padding: 12px; border-radius: 12px; text-align: center; color: white; font-weight: bold;">
|
439 |
+
π H200 MIG 69.5GB - COGVIDEOX READY - 6+ SECOND VIDEOS π
|
440 |
</div>
|
441 |
""")
|
442 |
|
443 |
+
with gr.Tab("π¬ Generate Video"):
|
444 |
with gr.Row():
|
445 |
with gr.Column(scale=1):
|
446 |
prompt_input = gr.Textbox(
|
447 |
+
label="π Detailed Video Prompt",
|
448 |
+
placeholder="A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot following the bird's smooth flight path, professional wildlife documentary style with warm sunset lighting, breathtaking landscape vista below...",
|
449 |
+
lines=4
|
|
|
450 |
)
|
451 |
|
452 |
negative_prompt_input = gr.Textbox(
|
453 |
+
label="π« Negative Prompt (Optional)",
|
454 |
+
placeholder="blurry, low quality, distorted, pixelated, static, boring, amateur...",
|
455 |
lines=2
|
456 |
)
|
457 |
|
458 |
+
with gr.Accordion("βοΈ Generation Settings", open=True):
|
459 |
with gr.Row():
|
460 |
num_frames = gr.Slider(
|
461 |
+
minimum=8,
|
462 |
maximum=49,
|
463 |
value=49,
|
464 |
step=1,
|
465 |
+
label="π¬ Frames (49 = 6+ seconds)"
|
466 |
)
|
467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
num_steps = gr.Slider(
|
469 |
minimum=30,
|
470 |
+
maximum=70,
|
471 |
value=50,
|
472 |
step=5,
|
473 |
+
label="βοΈ Inference Steps"
|
474 |
)
|
475 |
+
|
476 |
+
with gr.Row():
|
477 |
guidance_scale = gr.Slider(
|
478 |
minimum=4.0,
|
479 |
+
maximum=8.0,
|
480 |
value=6.0,
|
481 |
step=0.5,
|
482 |
label="π― Guidance Scale"
|
483 |
)
|
484 |
+
|
485 |
+
seed = gr.Number(
|
486 |
+
label="π² Seed (-1 for random)",
|
487 |
+
value=-1,
|
488 |
+
precision=0
|
489 |
+
)
|
490 |
|
491 |
generate_btn = gr.Button(
|
492 |
+
"π― Generate 6+ Second Video",
|
493 |
variant="primary",
|
494 |
size="lg"
|
495 |
)
|
496 |
|
497 |
gr.Markdown("""
|
498 |
+
**β±οΈ Generation Time:** 2-5 minutes
|
499 |
+
**π₯ Output:** 6+ second high-quality videos
|
500 |
+
**π€ Model:** CogVideoX auto-loads first time
|
|
|
|
|
|
|
|
|
|
|
501 |
""")
|
502 |
|
503 |
with gr.Column(scale=1):
|
504 |
video_output = gr.Video(
|
505 |
+
label="π₯ H200 Generated Video",
|
506 |
height=400
|
507 |
)
|
508 |
|
509 |
result_text = gr.Textbox(
|
510 |
+
label="π Generation Report",
|
511 |
+
lines=10,
|
512 |
show_copy_button=True
|
513 |
)
|
514 |
|
515 |
# Generate button
|
516 |
generate_btn.click(
|
517 |
+
fn=generate_video,
|
518 |
inputs=[
|
519 |
prompt_input, negative_prompt_input, num_frames,
|
520 |
+
num_steps, guidance_scale, seed
|
521 |
],
|
522 |
outputs=[video_output, result_text]
|
523 |
)
|
524 |
|
525 |
+
# Working examples
|
526 |
gr.Examples(
|
527 |
examples=[
|
528 |
[
|
529 |
+
"A majestic eagle soaring gracefully through mountain valleys during golden hour, cinematic aerial tracking shot, professional wildlife documentary style",
|
530 |
+
"blurry, low quality, static, amateur",
|
531 |
+
49, 50, 6.0, 42
|
532 |
],
|
533 |
[
|
534 |
+
"Ocean waves crashing against rocky coastline during sunset, slow motion cinematography with dramatic lighting and foam spray",
|
535 |
+
"calm, peaceful, low quality, boring",
|
536 |
+
41, 50, 6.5, 123
|
537 |
],
|
538 |
[
|
539 |
+
"A serene mountain lake reflecting autumn trees, gentle camera pan across the water surface, peaceful nature documentary style",
|
540 |
+
"urban, modern, low quality, distorted",
|
541 |
+
33, 45, 6.0, 456
|
542 |
],
|
543 |
[
|
544 |
+
"Steam rising from a hot coffee cup on wooden table by window during rain, cozy atmosphere with warm lighting, intimate close-up shot",
|
545 |
+
"cold, harsh, artificial, low quality",
|
546 |
+
25, 40, 6.0, 789
|
547 |
]
|
548 |
],
|
549 |
+
inputs=[prompt_input, negative_prompt_input, num_frames, num_steps, guidance_scale, seed]
|
550 |
)
|
551 |
|
552 |
+
with gr.Tab("π Model Status"):
|
553 |
with gr.Row():
|
554 |
+
status_btn = gr.Button("π Check Model Status")
|
555 |
+
logs_btn = gr.Button("π View Loading Logs")
|
556 |
+
settings_btn = gr.Button("βοΈ Optimal Settings")
|
557 |
|
558 |
status_output = gr.Markdown()
|
559 |
+
logs_output = gr.Textbox(label="Loading Logs", lines=15, show_copy_button=True)
|
560 |
settings_output = gr.Markdown()
|
561 |
|
562 |
status_btn.click(fn=get_model_status, outputs=status_output)
|
563 |
logs_btn.click(fn=get_loading_logs, outputs=logs_output)
|
564 |
+
settings_btn.click(fn=suggest_optimal_settings, outputs=settings_output)
|
565 |
|
566 |
# Auto-load status
|
567 |
demo.load(fn=get_model_status, outputs=status_output)
|
568 |
|
569 |
if __name__ == "__main__":
|
570 |
+
demo.queue(max_size=3)
|
571 |
demo.launch(
|
572 |
share=False,
|
573 |
server_name="0.0.0.0",
|