Instant⚡Video

import gradio as gr
import torch
import os
import spaces
import uuid

from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from diffusers.utils import export_to_video
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from PIL import Image

# Constants
bases = {
    "Cartoon": "frankjoshua/toonyou_beta6",
    "Realistic": "emilianJR/epiCRealism", 
    "3d": "Lykon/DreamShaper",
    "Anime": "Yntec/mistoonAnime2"
}

motion_models = {
    "Default": None,
    "Zoom in": "guoyww/animatediff-motion-lora-zoom-in",
    "Zoom out": "guoyww/animatediff-motion-lora-zoom-out",
    "Tilt up": "guoyww/animatediff-motion-lora-tilt-up",
    "Tilt down": "guoyww/animatediff-motion-lora-tilt-down",
    "Pan left": "guoyww/animatediff-motion-lora-pan-left",
    "Pan right": "guoyww/animatediff-motion-lora-pan-right",
    "Roll left": "guoyww/animatediff-motion-lora-rolling-anticlockwise",
    "Roll right": "guoyww/animatediff-motion-lora-rolling-clockwise",
}


# Preload models
if not torch.cuda.is_available():
    raise NotImplementedError("No GPU detected!")

device = "cuda"
dtype = torch.float16

pipes = {}
for base_name, base_path in bases.items():
    pipe = AnimateDiffPipeline.from_pretrained(base_path, torch_dtype=dtype).to(device)
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
    pipes[base_name] = pipe

# Load motion models
for motion_name, motion_path in motion_models.items():
    if motion_path:
        motion_model = MotionAdapter.from_pretrained(motion_path, torch_dtype=dtype).to(device)
        motion_models[motion_name] = motion_model


# Function 
@spaces.GPU(duration=60,queue=False)
def generate_image(prompt, base="Realistic", motion="Default", step=8, progress=gr.Progress()):
    global pipes
    global motion_models

    pipe = pipes[base]

    if motion != "Default":
        pipe.motion_adapter = motion_models[motion]
    else:
        pipe.motion_adapter = None


    # Load step model if not already loaded
    repo = "ByteDance/AnimateDiff-Lightning"
    ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
    try:
        pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt, local_files_only=True), device=device), strict=False)
    except:
        pipe.unet.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device), strict=False)

    # Generate image
    output = pipe(prompt=f"{base} image of {prompt}", guidance_scale=1.2, num_inference_steps=step)

    name = str(uuid.uuid4()).replace("-", "")
    path = f"/tmp/{name}.mp4"
    export_to_video(output.frames[0], path, fps=10)
    return path


# Gradio Interface
with gr.Blocks(css="style.css") as demo:
    gr.HTML(
        "<h1><center>Instant⚡Video</center></h1>" +
        "<p><center><span style='color: red;'>You may change the steps from 4 to 8, if you didn't get satisfied results.</center></p>" +
        "<p><center><strong>First Video Generating takes time then Videos generate faster.</p>" +
        "<p><center>To get best results Make Sure to Write prompts in style as Given in Examples/p>" +
        "<p><a href='https://huggingface.co/spaces/KingNish/Instant-Video/discussions/1' >Must Share you Best Results with Community - Click HERE<a></p>"
    )
    with gr.Group():
        with gr.Row():
            prompt = gr.Textbox(
                label='Prompt'
            )
        with gr.Row():
            select_base = gr.Dropdown(
                label='Base model',
                choices=[
                    "Cartoon", 
                    "Realistic",
                    "3d",
                    "Anime",
                ],
                value=base_loaded,
                interactive=True
            )
            select_motion = gr.Dropdown(
                label='Motion',
                choices=[
                    ("Default", ""),
                    ("Zoom in", "guoyww/animatediff-motion-lora-zoom-in"),
                    ("Zoom out", "guoyww/animatediff-motion-lora-zoom-out"),
                    ("Tilt up", "guoyww/animatediff-motion-lora-tilt-up"),
                    ("Tilt down", "guoyww/animatediff-motion-lora-tilt-down"),
                    ("Pan left", "guoyww/animatediff-motion-lora-pan-left"),
                    ("Pan right", "guoyww/animatediff-motion-lora-pan-right"),
                    ("Roll left", "guoyww/animatediff-motion-lora-rolling-anticlockwise"),
                    ("Roll right", "guoyww/animatediff-motion-lora-rolling-clockwise"),
                ],
                value="guoyww/animatediff-motion-lora-zoom-in",
                interactive=True
            )
            select_step = gr.Dropdown(
                label='Inference steps',
                choices=[
                    ('1-Step', 1), 
                    ('2-Step', 2),
                    ('4-Step', 4),
                    ('8-Step', 8),
                ],
                value=4,
                interactive=True
            )
            submit = gr.Button(
                scale=1,
                variant='primary'
            )
    video = gr.Video(
        label='AnimateDiff-Lightning',
        autoplay=True,
        height=512,
        width=512,
        elem_id="video_output"
    )

    gr.on(triggers=[
            submit.click,
            prompt.submit
    ],
        fn = generate_image,
        inputs = [prompt, select_base, select_motion, select_step],
        outputs = [video],
        api_name = "instant_video",
        queue = False
    )

    gr.Examples(
        examples=[
        ["Focus: Eiffel Tower (Animate: Clouds moving)"], #Atmosphere Movement Example
        ["Focus: Trees In forest (Animate: Lion running)"], #Object Movement Example
        ["Focus: Astronaut in Space"], #Normal
        ["Focus: Group of Birds in sky (Animate:  Birds Moving) (Shot From distance)"], #Camera distance
        ["Focus:  Statue of liberty (Shot from Drone) (Animate: Drone coming toward statue)"], #Camera Movement
        ["Focus: Panda in Forest (Animate: Drinking Tea)"], #Doing Something
        ["Focus: Kids Playing (Season: Winter)"], #Atmosphere or Season
        {"Focus: Cars in Street (Season: Rain, Daytime) (Shot from Distance) (Movement: Cars running)"} #Mixture
    ], 
        fn=generate_image,
        inputs=[prompt],
        outputs=[video],
        cache_examples="lazy",
)

demo.queue().launch()