File size: 3,096 Bytes
25ef180
6213120
25ef180
 
f63f50a
2f6874c
02dec83
25ef180
8d9a1a3
25ef180
5612db5
 
 
25ef180
8d9a1a3
25ef180
 
 
 
 
 
b8becf0
25ef180
 
5612db5
25ef180
5612db5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25ef180
5612db5
 
25ef180
5612db5
25ef180
5612db5
25ef180
 
 
5612db5
 
 
 
ad2ab39
33e0753
 
 
 
 
 
 
 
 
 
 
 
6e8e48d
cab1be7
33e0753
 
 
 
 
 
 
 
ad69708
25ef180
 
 
5612db5
 
ad2ab39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import torch
from PIL import Image
import os
from diffusers import StableVideoDiffusionPipeline
from .tdd_svd_scheduler import TDDSVDStochasticIterativeScheduler
from .utils import load_lora_weights, save_video
from glob import glob
from typing import Optional

# Define paths and device
svd_path = 'stabilityai/stable-video-diffusion-img2vid-xt-1-1'
lora_repo_path = 'RED-AIGC/TDD'
lora_weight_name = 'svd-xt-1-1_tdd_lora_weights.safetensors'
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize the noise scheduler and pipeline
noise_scheduler = TDDSVDStochasticIterativeScheduler(
    num_train_timesteps=250, sigma_min=0.002, sigma_max=700.0, 
    sigma_data=1.0, s_noise=1.0, rho=7, clip_denoised=False
)
pipeline = StableVideoDiffusionPipeline.from_pretrained(
    svd_path, scheduler=noise_scheduler, torch_dtype=torch.float32
).to(device)
load_lora_weights(pipeline.unet, lora_repo_path, weight_name=lora_weight_name)

# Video function definition
def Video(
    image: Image,
    seed: Optional[int] = 1,
    randomize_seed: bool = False,
    num_inference_steps: int = 4,
    eta: float = 0.3,
    min_guidance_scale: float = 1.0,
    max_guidance_scale: float = 1.0,
    fps: int = 7,
    width: int = 512,
    height: int = 512,
    num_frames: int = 25,
    motion_bucket_id: int = 127,
    output_folder: str = "outputs_gradio",
):
    # Set the eta value in the scheduler
    pipeline.scheduler.set_eta(eta)

    # Handle seed randomness
    if randomize_seed:
        seed = random.randint(0, 2**64 - 1)
    generator = torch.manual_seed(seed)
    
    # Ensure the image is converted to a format that the model can use
    image = Image.fromarray(image)
    os.makedirs(output_folder, exist_ok=True)
    base_count = len(glob(os.path.join(output_folder, "*.mp4")))
    video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")

    # Perform computation with appropriate dtype based on device
    # if device == "cuda":
    # # Use float16 for GPU
    #     with torch.autocast(device_type='cuda', dtype=torch.float16):
    #         frames = pipeline(
    #             image, height=height, width=width,
    #             num_inference_steps=num_inference_steps,
    #             min_guidance_scale=min_guidance_scale,
    #             max_guidance_scale=max_guidance_scale,
    #             num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id,
    #             generator=generator,
    #         ).frames[0]
    # else:
        # Use bfloat16 for CPU as it's supported in torch.autocast
        # with torch.autocast(device_type='cpu', dtype=torch.bfloat16):
    frames = pipeline(
        image, height=height, width=width,
        num_inference_steps=num_inference_steps,
        min_guidance_scale=min_guidance_scale,
        max_guidance_scale=max_guidance_scale,
        num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id,
        generator=generator,
    ).frames[0]


    # Save the generated video
    save_video(frames, video_path, fps=fps, quality=5.0)
    torch.manual_seed(seed)

    return video_path, seed