File size: 4,360 Bytes
b935e3d 1da8dac c31fb3d 1da8dac b67e239 1da8dac c31fb3d 1da8dac c31fb3d 1da8dac c31fb3d 1da8dac c31fb3d 1da8dac 45a6a13 1da8dac c31fb3d 1da8dac c31fb3d 1da8dac c31fb3d 1da8dac c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 1da8dac c31fb3d 45a6a13 c31fb3d 45a6a13 c31fb3d 45a6a13 f404d24 c31fb3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import torch
import os
from glob import glob
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image
import random
from moviepy import VideoFileClip, concatenate_videoclips
# Load the Stable Video Diffusion Pipeline
pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt",
torch_dtype=torch.float16,
variant="fp16"
)
pipe.to("cuda")
# Maximum seed value
max_64_bit_int = 2**63 - 1
# Resize and crop image to desired resolution
def resize_image(image, output_size=(1024, 576)):
target_aspect = output_size[0] / output_size[1]
image_aspect = image.width / image.height
if image_aspect > target_aspect:
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
left = (new_width - output_size[0]) / 2
right = (new_width + output_size[0]) / 2
top, bottom = 0, output_size[1]
else:
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
left, right = 0, output_size[0]
top = (new_height - output_size[1]) / 2
bottom = (new_height + output_size[1]) / 2
return resized_image.crop((left, top, right, bottom))
# Combine multiple video snippets into a single video
def combine_videos(video_paths, output_path="outputs/final_long_video.mp4"):
os.makedirs("outputs", exist_ok=True)
clips = [VideoFileClip(vp) for vp in video_paths]
final_clip = concatenate_videoclips(clips, method="compose")
final_clip.write_videofile(output_path, codec="libx264", fps=clips[0].fps, audio=False)
return output_path
# Generate a video snippet from an input image
def generate_snippet(
init_image: Image, seed: int, motion_bucket_id: int, fps_id: int, decoding_t: int, output_folder: str
):
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
result = pipe(
init_image,
decode_chunk_size=decoding_t,
generator=generator,
motion_bucket_id=motion_bucket_id,
noise_aug_strength=0.1,
num_frames=25
)
frames = result.frames[0]
export_to_video(frames, video_path, fps=fps_id)
return frames[-1], video_path
# Generate a long video composed of 5 short snippets
def sample_long(
image: Image,
seed: Optional[int] = 42,
randomize_seed: bool = True,
motion_bucket_id: int = 127,
fps_id: int = 6,
decoding_t: int = 3,
output_folder: str = "outputs"
):
if image.mode == "RGBA":
image = image.convert("RGB")
if randomize_seed:
seed = random.randint(0, max_64_bit_int)
snippet_paths = []
current_image = image
for _ in range(5):
current_image, snippet_path = generate_snippet(
init_image=current_image,
seed=seed,
motion_bucket_id=motion_bucket_id,
fps_id=fps_id,
decoding_t=decoding_t,
output_folder=output_folder
)
snippet_paths.append(snippet_path)
return combine_videos(snippet_paths), seed
# Build the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("### Stable Video Diffusion - Generate a Long Video")
with gr.Row():
with gr.Column():
image = gr.Image(label="Upload an image", type="pil")
generate_btn = gr.Button("Generate Long Video")
video_output = gr.Video()
with gr.Accordion("Advanced Options", open=False):
seed = gr.Slider(0, max_64_bit_int, value=42, step=1, label="Seed")
randomize_seed = gr.Checkbox(value=True, label="Randomize Seed")
motion_bucket_id = gr.Slider(1, 255, value=127, step=1, label="Motion Bucket ID")
fps_id = gr.Slider(5, 30, value=6, step=1, label="Frames Per Second")
generate_btn.click(
sample_long,
inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id],
outputs=[video_output, seed]
)
if __name__ == "__main__":
demo.launch(share=True)
|