rahul7star commited on
Commit
b58eb59
·
verified ·
1 Parent(s): 770f802

Update DF.py

Browse files
Files changed (1) hide show
  1. DF.py +30 -38
DF.py CHANGED
@@ -1,56 +1,54 @@
1
  import os
2
  import time
 
3
  import torch
4
  import gradio as gr
5
  from diffusers import WanPipeline, AutoencoderKLWan
6
  from diffusers.utils import export_to_video
7
  from dfloat11 import DFloat11Model
8
  import spaces
9
- import uuid
10
 
11
- # Set environment variables
12
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
13
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
14
- os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
15
 
16
- # Ensure this runs on CPU or ZeroGPU
17
  @spaces.GPU(enable_queue=True)
18
  def generate_video(prompt, negative_prompt, width, height, num_frames,
19
- guidance_scale, guidance_scale_2, num_inference_steps, fps):
20
- torch.cuda.empty_cache()
21
  start_time = time.time()
 
22
 
23
  # Load model
24
  vae = AutoencoderKLWan.from_pretrained(
25
  "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
26
  subfolder="vae",
27
- torch_dtype=torch.float32
28
  )
29
 
30
  pipe = WanPipeline.from_pretrained(
31
  "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
32
  vae=vae,
33
- torch_dtype=torch.bfloat16
34
  )
35
 
36
- # Load DFloat11 optimization layers
37
  DFloat11Model.from_pretrained(
38
- "DFloat11/Wan2.2-T2V-A14B-DF11",
39
  device="cpu",
40
- cpu_offload=True,
41
  bfloat16_model=pipe.transformer,
42
  )
43
  DFloat11Model.from_pretrained(
44
- "DFloat11/Wan2.2-T2V-A14B-2-DF11",
45
  device="cpu",
46
- cpu_offload=True,
47
  bfloat16_model=pipe.transformer_2,
48
  )
49
 
50
  pipe.enable_model_cpu_offload()
51
 
52
- # Run inference
53
- result = pipe(
54
  prompt=prompt,
55
  negative_prompt=negative_prompt,
56
  height=height,
@@ -61,51 +59,45 @@ def generate_video(prompt, negative_prompt, width, height, num_frames,
61
  num_inference_steps=num_inference_steps,
62
  ).frames[0]
63
 
64
- output_path = f"/tmp/video_{uuid.uuid4().hex}.mp4"
65
- export_to_video(result, output_path, fps=fps)
 
66
 
67
  elapsed = time.time() - start_time
68
- print(f"Video generated in {elapsed:.2f} seconds")
69
-
70
  return output_path
71
 
72
 
73
  # Gradio UI
74
  with gr.Blocks() as demo:
75
- gr.Markdown("## 🎥 Wan2.2 Text-to-Video Generator (ZeroGPU Ready)")
76
 
77
  with gr.Row():
78
- prompt = gr.Textbox(
79
- label="Prompt",
80
- value="A serene koi pond at night, with glowing lanterns reflecting on the rippling water. Ethereal fireflies dance above as cherry blossoms gently fall.",
81
- lines=3
82
- )
83
- negative_prompt = gr.Textbox(
84
- label="Negative Prompt",
85
- value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
86
- lines=3
87
- )
88
 
89
  with gr.Row():
90
- width = gr.Slider(256, 1280, value=768, step=64, label="Width")
91
- height = gr.Slider(256, 720, value=432, step=64, label="Height")
92
- num_frames = gr.Slider(8, 81, value=40, step=1, label="Number of Frames")
93
  fps = gr.Slider(8, 30, value=16, step=1, label="FPS")
94
 
95
  with gr.Row():
96
- guidance_scale = gr.Slider(1.0, 10.0, value=4.0, step=0.1, label="Guidance Scale")
97
- guidance_scale_2 = gr.Slider(1.0, 10.0, value=3.0, step=0.1, label="Guidance Scale 2")
98
  num_inference_steps = gr.Slider(10, 60, value=40, step=1, label="Inference Steps")
99
 
100
  with gr.Row():
101
- btn = gr.Button("🎬 Generate Video")
 
 
 
 
 
102
  output_video = gr.Video(label="Generated Video")
103
 
104
  btn.click(
105
  generate_video,
106
- inputs=[prompt, negative_prompt, width, height, num_frames, guidance_scale, guidance_scale_2, num_inference_steps, fps],
107
  outputs=[output_video]
108
  )
109
 
110
- # Launch Gradio app
111
  demo.launch()
 
1
  import os
2
  import time
3
+ import uuid
4
  import torch
5
  import gradio as gr
6
  from diffusers import WanPipeline, AutoencoderKLWan
7
  from diffusers.utils import export_to_video
8
  from dfloat11 import DFloat11Model
9
  import spaces
 
10
 
 
11
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
12
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
13
 
 
14
  @spaces.GPU(enable_queue=True)
15
  def generate_video(prompt, negative_prompt, width, height, num_frames,
16
+ guidance_scale, guidance_scale_2, num_inference_steps, fps, cpu_offload):
17
+
18
  start_time = time.time()
19
+ torch.cuda.empty_cache()
20
 
21
  # Load model
22
  vae = AutoencoderKLWan.from_pretrained(
23
  "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
24
  subfolder="vae",
25
+ torch_dtype=torch.float32,
26
  )
27
 
28
  pipe = WanPipeline.from_pretrained(
29
  "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
30
  vae=vae,
31
+ torch_dtype=torch.bfloat16,
32
  )
33
 
34
+ # Load DFloat11 optimizations
35
  DFloat11Model.from_pretrained(
36
+ "LeanModels/Wan2.2-T2V-A14B-DF11",
37
  device="cpu",
38
+ cpu_offload=cpu_offload,
39
  bfloat16_model=pipe.transformer,
40
  )
41
  DFloat11Model.from_pretrained(
42
+ "LeanModels/Wan2.2-T2V-A14B-2-DF11",
43
  device="cpu",
44
+ cpu_offload=cpu_offload,
45
  bfloat16_model=pipe.transformer_2,
46
  )
47
 
48
  pipe.enable_model_cpu_offload()
49
 
50
+ # Generate video frames
51
+ output_frames = pipe(
52
  prompt=prompt,
53
  negative_prompt=negative_prompt,
54
  height=height,
 
59
  num_inference_steps=num_inference_steps,
60
  ).frames[0]
61
 
62
+ # Export to video
63
+ output_path = f"/tmp/{uuid.uuid4().hex}_t2v.mp4"
64
+ export_to_video(output_frames, output_path, fps=fps)
65
 
66
  elapsed = time.time() - start_time
67
+ print(f" Generated in {elapsed:.2f}s, saved to {output_path}")
 
68
  return output_path
69
 
70
 
71
  # Gradio UI
72
  with gr.Blocks() as demo:
73
+ gr.Markdown("## 🎬 Wan2.2 + DFloat11 - Text to Video Generator")
74
 
75
  with gr.Row():
76
+ prompt = gr.Textbox(label="Prompt", value="A serene koi pond at night, with glowing lanterns reflecting on the rippling water. Ethereal fireflies dance above as cherry blossoms gently fall.", lines=3)
77
+ negative_prompt = gr.Textbox(label="Negative Prompt", value="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走", lines=3)
 
 
 
 
 
 
 
 
78
 
79
  with gr.Row():
80
+ width = gr.Slider(256, 1280, value=1280, step=64, label="Width")
81
+ height = gr.Slider(256, 720, value=720, step=64, label="Height")
 
82
  fps = gr.Slider(8, 30, value=16, step=1, label="FPS")
83
 
84
  with gr.Row():
85
+ num_frames = gr.Slider(8, 81, value=81, step=1, label="Frames")
 
86
  num_inference_steps = gr.Slider(10, 60, value=40, step=1, label="Inference Steps")
87
 
88
  with gr.Row():
89
+ guidance_scale = gr.Slider(1.0, 10.0, value=4.0, step=0.1, label="Guidance Scale (Stage 1)")
90
+ guidance_scale_2 = gr.Slider(1.0, 10.0, value=3.0, step=0.1, label="Guidance Scale (Stage 2)")
91
+ cpu_offload = gr.Checkbox(label="Enable CPU Offload", value=True)
92
+
93
+ with gr.Row():
94
+ btn = gr.Button("🚀 Generate Video")
95
  output_video = gr.Video(label="Generated Video")
96
 
97
  btn.click(
98
  generate_video,
99
+ inputs=[prompt, negative_prompt, width, height, num_frames, guidance_scale, guidance_scale_2, num_inference_steps, fps, cpu_offload],
100
  outputs=[output_video]
101
  )
102
 
 
103
  demo.launch()