File size: 5,370 Bytes
9ac31b8 07c1f7a 9ac31b8 3b06696 9ac31b8 3b06696 a9235bb 25d3956 e3d310b 07c1f7a d56d267 36be800 9ac31b8 d56d267 9ac31b8 8010ebe 9ac31b8 d56d267 c907ab3 0cd72ee 53a5202 3b06696 492fffc 3b06696 9ac31b8 d56d267 171a390 3b06696 36be800 9ac31b8 d56d267 9ac31b8 0cd72ee efa319b 9ac31b8 f58ac54 9ac31b8 f58ac54 3b06696 0cd72ee d56d267 3b06696 d56d267 3b06696 d56d267 ff46702 d56d267 d6e8a5f 3b06696 498ebf4 25d3956 0cd72ee 25d3956 f58ac54 c8d4706 53a5202 492fffc 25d3956 c907ab3 9ac31b8 d56d267 3744cdd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import gradio as gr
#import gradio.helpers
import torch
import os
from glob import glob
from pathlib import Path
from typing import Optional
from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video
from PIL import Image
import uuid
import random
from huggingface_hub import hf_hub_download
#gradio.helpers.CACHED_FOLDER = '/data/cache'
SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
pipe = StableVideoDiffusionPipeline.from_pretrained(
"stabilityai/stable-video-diffusion-img2vid-xt", torch_dtype=torch.float16, variant="fp16"
)
pipe.to("cuda")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
#pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
max_64_bit_int = 2**63 - 1
def generate_video(
image: Image,
seed: int,
motion_bucket_id: int = 127,
fps_id: int = 6,
version: str = "svd_xt",
cond_aug: float = 0.02,
decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
device: str = "cuda",
output_folder: str = "outputs",
secret_token: str = "",
):
if secret_token != SECRET_TOKEN:
raise gr.Error(
f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
image = resize_image(image)
if image.mode == "RGBA":
image = image.convert("RGB")
generator = torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
export_to_video(frames, video_path, fps=fps_id)
torch.manual_seed(seed)
# Read the content of the video file and encode it to base64
with open(video_path, "rb") as video_file:
video_base64 = base64.b64encode(video_file.read()).decode('utf-8')
# Prepend the appropriate data URI header with MIME type
video_data_uri = 'data:video/mp4;base64,' + video_base64
# Clean up the video file to avoid filling up storage
# os.remove(video_path)
return video_data_uri
def resize_image(image, output_size=(1024, 576)):
# Calculate aspect ratios
target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
image_aspect = image.width / image.height # Aspect ratio of the original image
# Resize then crop if the original image is larger
if image_aspect > target_aspect:
# Resize the image to match the target height, maintaining aspect ratio
new_height = output_size[1]
new_width = int(new_height * image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = (new_width - output_size[0]) / 2
top = 0
right = (new_width + output_size[0]) / 2
bottom = output_size[1]
else:
# Resize the image to match the target width, maintaining aspect ratio
new_width = output_size[0]
new_height = int(new_width / image_aspect)
resized_image = image.resize((new_width, new_height), Image.LANCZOS)
# Calculate coordinates for cropping
left = 0
top = (new_height - output_size[1]) / 2
right = output_size[0]
bottom = (new_height + output_size[1]) / 2
# Crop the image
cropped_image = resized_image.crop((left, top, right, bottom))
return cropped_image
with gr.Blocks() as demo:
gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
#### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `4s` vid from a single image at (`25 frames` at `6 fps`). this demo uses [🧨 diffusers for low VRAM and fast generation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd).
''')
secret_token = gr.Text(
label='Secret Token',
max_lines=1,
placeholder='Enter your secret token',
)
with gr.Row():
with gr.Column():
image = gr.Image(label="Upload your image", type="pil")
generate_btn = gr.Button("Generate")
base64_out = gr.Textbox(label="Base64 Video")
with gr.Accordion("Advanced options", open=False):
seed = gr.Slider(label="Seed", value=42, randomize=False, minimum=0, maximum=max_64_bit_int, step=1)
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
generate_btn.click(fn=generate_video, inputs=[image, seed, motion_bucket_id, fps_id, secret_token], outputs=[base64_out], api_name="generate_video")
if __name__ == "__main__":
demo.queue(max_size=20).launch() |