FastWan2.2_5B_TI2V

Running on Zero

App Files Files Community

rahul7star commited on 19 days ago

Commit

7f14f6f

verified ·

1 Parent(s): 4df0ad4

Update app_t2v.py

Browse files

Files changed (1) hide show

app_t2v.py +44 -16

app_t2v.py CHANGED Viewed

@@ -14,25 +14,53 @@ from diffusers.utils import export_to_video
 MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
-DEFAULT_NEGATIVE_PROMPT = "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
 # Setup
-dtype = torch.float16  # switched to float16 for stability
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load model
-vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=dtype)
-pipe.to(device)
-# Prime the pipeline (warm-up to reduce first-run latency)
-_ = pipe(prompt="warmup", negative_prompt=DEFAULT_NEGATIVE_PROMPT, height=512, width=768, num_frames=8, num_inference_steps=2).frames[0]
-# GPU duration estimator
-@spaces.GPU(duration=200)
-def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed):
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     generator = torch.Generator(device=device).manual_seed(current_seed)
@@ -54,7 +82,7 @@ def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎬 Wan2.2 Text-to-Video Generator with HF Spaces GPU")
     with gr.Row():
         with gr.Column():

 MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 16
+DEFAULT_NEGATIVE_PROMPT = (
+    "色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，"
+    "最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，"
+    "画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走"
+)
 # Setup
+dtype = torch.float16  # using float16 for broader compatibility
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Load model components on correct device
+vae = AutoencoderKLWan.from_pretrained(
+    MODEL_ID, subfolder="vae", torch_dtype=torch.float32
+).to(device)
+pipe = WanPipeline.from_pretrained(
+    MODEL_ID, vae=vae, torch_dtype=dtype
+).to(device)
+# Warm-up call to reduce cold-start latency
+_ = pipe(
+    prompt="warmup",
+    negative_prompt=DEFAULT_NEGATIVE_PROMPT,
+    height=512,
+    width=768,
+    num_frames=8,
+    num_inference_steps=2,
+    generator=torch.Generator(device=device).manual_seed(0),
+).frames[0]
+# Estimate duration for Hugging Face Spaces GPU usage
+def get_duration(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed):
+    return int(num_steps * 15)
+@spaces.GPU(duration=get_duration)
+def generate_video(
+    prompt,
+    negative_prompt,
+    height,
+    width,
+    num_frames,
+    guidance_scale,
+    guidance_scale_2,
+    num_steps,
+    seed,
+    randomize_seed
+):
     current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
     generator = torch.Generator(device=device).manual_seed(current_seed)
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎬 Wan2.2 Text-to-Video Generator with Hugging Face Spaces GPU")
     with gr.Row():
         with gr.Column():