FastWan2.2_5B_TI2V

Running on Zero

File size: 2,499 Bytes

37b2b3a
44ef737
 
 
37b2b3a
44ef737
37b2b3a
44ef737
 
 
37b2b3a
44ef737
 
 
37b2b3a
44ef737
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37b2b3a
44ef737
 
 
37b2b3a
44ef737
 
 
37b2b3a
44ef737
 
 
37b2b3a
 
44ef737
 
37b2b3a
44ef737
 
37b2b3a
44ef737
 
 
 
37b2b3a
 
44ef737

import gradio as gr
import torch
from diffusers import WanPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video
import tempfile
import os

# Setup
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model and VAE once
vae = AutoencoderKLWan.from_pretrained(
    "Wan-AI/Wan2.2-T2V-A14B-Diffusers", subfolder="vae", torch_dtype=torch.float32
)
pipe = WanPipeline.from_pretrained(
    "Wan-AI/Wan2.2-T2V-A14B-Diffusers", vae=vae, torch_dtype=dtype
)
pipe.to(device)

# Core inference function
def generate_video(prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps):
    with torch.autocast("cuda", dtype=dtype):
        output = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            height=height,
            width=width,
            num_frames=num_frames,
            guidance_scale=guidance_scale,
            guidance_scale_2=guidance_scale_2,
            num_inference_steps=num_steps,
        ).frames[0]

    temp_dir = tempfile.mkdtemp()
    video_path = os.path.join(temp_dir, "output.mp4")
    export_to_video(output, video_path, fps=16)
    return video_path

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🐾 Wan2.2 T2V Demo – Gradio Edition")

    with gr.Row():
        prompt = gr.Textbox(label="Prompt", value="Two anthropomorphic cats in comfy boxing gear fight intensely.")
        negative_prompt = gr.Textbox(label="Negative Prompt", value="色调艳丽，过曝，静态，细节模糊不清，字幕，最差质量，丑陋的，多余的手指，畸形")

    with gr.Row():
        height = gr.Slider(360, 1024, value=720, step=16, label="Height")
        width = gr.Slider(360, 1920, value=1280, step=16, label="Width")

    with gr.Row():
        num_frames = gr.Slider(16, 100, value=81, step=1, label="Number of Frames")
        num_steps = gr.Slider(10, 60, value=40, step=1, label="Inference Steps")

    with gr.Row():
        guidance_scale = gr.Slider(1.0, 10.0, value=4.0, step=0.5, label="Guidance Scale")
        guidance_scale_2 = gr.Slider(1.0, 10.0, value=3.0, step=0.5, label="Guidance Scale 2")

    generate_btn = gr.Button("Generate Video")
    video_output = gr.Video(label="Generated Video")

    generate_btn.click(
        fn=generate_video,
        inputs=[prompt, negative_prompt, height, width, num_frames, guidance_scale, guidance_scale_2, num_steps],
        outputs=video_output,
    )

demo.launch()