rahul7star commited on
Commit
7f14f6f
·
verified ·
1 Parent(s): 4df0ad4

Update app_t2v.py

Browse files
Files changed (1) hide show
  1. app_t2v.py +44 -16
app_t2v.py CHANGED
@@ -14,25 +14,53 @@ from diffusers.utils import export_to_video
14
  MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
15
  MAX_SEED = np.iinfo(np.int32).max
16
  FIXED_FPS = 16
17
- DEFAULT_NEGATIVE_PROMPT = "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
 
 
 
 
18
 
19
  # Setup
20
- dtype = torch.float16 # switched to float16 for stability
21
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
22
 
23
- # Load model
24
- vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
25
- pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=dtype)
26
- pipe.to(device)
27
-
28
- # Prime the pipeline (warm-up to reduce first-run latency)
29
- _ = pipe(prompt="warmup", negative_prompt=DEFAULT_NEGATIVE_PROMPT, height=512, width=768, num_frames=8, num_inference_steps=2).frames[0]
30
-
31
- # GPU duration estimator
32
-
33
-
34
- @spaces.GPU(duration=200)
35
- def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
37
  generator = torch.Generator(device=device).manual_seed(current_seed)
38
 
@@ -54,7 +82,7 @@ def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_
54
 
55
  # Gradio UI
56
  with gr.Blocks() as demo:
57
- gr.Markdown("## 🎬 Wan2.2 Text-to-Video Generator with HF Spaces GPU")
58
 
59
  with gr.Row():
60
  with gr.Column():
 
14
  MODEL_ID = "Wan-AI/Wan2.2-T2V-A14B-Diffusers"
15
  MAX_SEED = np.iinfo(np.int32).max
16
  FIXED_FPS = 16
17
+ DEFAULT_NEGATIVE_PROMPT = (
18
+ "色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,"
19
+ "最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,"
20
+ "画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"
21
+ )
22
 
23
  # Setup
24
+ dtype = torch.float16 # using float16 for broader compatibility
25
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
26
 
27
+ # Load model components on correct device
28
+ vae = AutoencoderKLWan.from_pretrained(
29
+ MODEL_ID, subfolder="vae", torch_dtype=torch.float32
30
+ ).to(device)
31
+
32
+ pipe = WanPipeline.from_pretrained(
33
+ MODEL_ID, vae=vae, torch_dtype=dtype
34
+ ).to(device)
35
+
36
+ # Warm-up call to reduce cold-start latency
37
+ _ = pipe(
38
+ prompt="warmup",
39
+ negative_prompt=DEFAULT_NEGATIVE_PROMPT,
40
+ height=512,
41
+ width=768,
42
+ num_frames=8,
43
+ num_inference_steps=2,
44
+ generator=torch.Generator(device=device).manual_seed(0),
45
+ ).frames[0]
46
+
47
+ # Estimate duration for Hugging Face Spaces GPU usage
48
+ def get_duration(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps, seed, randomize_seed):
49
+ return int(num_steps * 15)
50
+
51
+ @spaces.GPU(duration=get_duration)
52
+ def generate_video(
53
+ prompt,
54
+ negative_prompt,
55
+ height,
56
+ width,
57
+ num_frames,
58
+ guidance_scale,
59
+ guidance_scale_2,
60
+ num_steps,
61
+ seed,
62
+ randomize_seed
63
+ ):
64
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
65
  generator = torch.Generator(device=device).manual_seed(current_seed)
66
 
 
82
 
83
  # Gradio UI
84
  with gr.Blocks() as demo:
85
+ gr.Markdown("## 🎬 Wan2.2 Text-to-Video Generator with Hugging Face Spaces GPU")
86
 
87
  with gr.Row():
88
  with gr.Column():