import torch from diffusers import UniPCMultistepScheduler from diffusers import WanPipeline, AutoencoderKLWan # Use Wan-specific VAE from diffusers.models import UNetSpatioTemporalConditionModel from transformers import T5EncoderModel, T5Tokenizer from PIL import Image import numpy as np import gradio as gr model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers" vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16) flow_shift = 5.0 # 5.0 for 720P, 3.0 for 480P pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=flow_shift) @spaces.GPU() def generate(prompt): pipe.to("cuda") output = pipe( prompt=prompt, # negative_prompt=negative_prompt, height=720, width=1280, num_frames=1, num_inference_steps=28, guidance_scale=5.0, ) image = output.frames[0][0] image = (image * 255).astype(np.uint8) return Image.fromarray(image) iface = gr.Interface( fn=generate, inputs=[ gr.Textbox(label="Input prompt"), # gr.Slider(label="Width", minimum=256, maximum=2048, step=8, value=1024), # gr.Slider(label="Height", minimum=256, maximum=2048, step=8, value=1024), # gr.Textbox(label="Lora ID", placeholder="Optional"), # gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="Lora Scale", value=1) ], outputs=gr.Image(label="output"), ) iface.launch()