Spaces:
Running
Running
File size: 3,096 Bytes
25ef180 6213120 25ef180 f63f50a 2f6874c 02dec83 25ef180 8d9a1a3 25ef180 5612db5 25ef180 8d9a1a3 25ef180 b8becf0 25ef180 5612db5 25ef180 5612db5 25ef180 5612db5 25ef180 5612db5 25ef180 5612db5 25ef180 5612db5 ad2ab39 33e0753 6e8e48d cab1be7 33e0753 ad69708 25ef180 5612db5 ad2ab39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import torch
from PIL import Image
import os
from diffusers import StableVideoDiffusionPipeline
from .tdd_svd_scheduler import TDDSVDStochasticIterativeScheduler
from .utils import load_lora_weights, save_video
from glob import glob
from typing import Optional
# Define paths and device
svd_path = 'stabilityai/stable-video-diffusion-img2vid-xt-1-1'
lora_repo_path = 'RED-AIGC/TDD'
lora_weight_name = 'svd-xt-1-1_tdd_lora_weights.safetensors'
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize the noise scheduler and pipeline
noise_scheduler = TDDSVDStochasticIterativeScheduler(
num_train_timesteps=250, sigma_min=0.002, sigma_max=700.0,
sigma_data=1.0, s_noise=1.0, rho=7, clip_denoised=False
)
pipeline = StableVideoDiffusionPipeline.from_pretrained(
svd_path, scheduler=noise_scheduler, torch_dtype=torch.float32
).to(device)
load_lora_weights(pipeline.unet, lora_repo_path, weight_name=lora_weight_name)
# Video function definition
def Video(
image: Image,
seed: Optional[int] = 1,
randomize_seed: bool = False,
num_inference_steps: int = 4,
eta: float = 0.3,
min_guidance_scale: float = 1.0,
max_guidance_scale: float = 1.0,
fps: int = 7,
width: int = 512,
height: int = 512,
num_frames: int = 25,
motion_bucket_id: int = 127,
output_folder: str = "outputs_gradio",
):
# Set the eta value in the scheduler
pipeline.scheduler.set_eta(eta)
# Handle seed randomness
if randomize_seed:
seed = random.randint(0, 2**64 - 1)
generator = torch.manual_seed(seed)
# Ensure the image is converted to a format that the model can use
image = Image.fromarray(image)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
# Perform computation with appropriate dtype based on device
# if device == "cuda":
# # Use float16 for GPU
# with torch.autocast(device_type='cuda', dtype=torch.float16):
# frames = pipeline(
# image, height=height, width=width,
# num_inference_steps=num_inference_steps,
# min_guidance_scale=min_guidance_scale,
# max_guidance_scale=max_guidance_scale,
# num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id,
# generator=generator,
# ).frames[0]
# else:
# Use bfloat16 for CPU as it's supported in torch.autocast
# with torch.autocast(device_type='cpu', dtype=torch.bfloat16):
frames = pipeline(
image, height=height, width=width,
num_inference_steps=num_inference_steps,
min_guidance_scale=min_guidance_scale,
max_guidance_scale=max_guidance_scale,
num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id,
generator=generator,
).frames[0]
# Save the generated video
save_video(frames, video_path, fps=fps, quality=5.0)
torch.manual_seed(seed)
return video_path, seed |