LTX-Video-Playground

Running on Zero

File size: 6,223 Bytes

7da056c
1fd2df8
f3beecc
056829f
1fd2df8
056829f
1fd2df8
056829f
f1a05f0
1fd2df8
056829f
f1a05f0
1fd2df8
be22e58
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d37981
f3beecc
1fd2df8
 
 
f3beecc
1fd2df8
 
 
c1ee18a
1fd2df8
 
c1ee18a
1fd2df8
 
 
 
 
 
 
056829f
 
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3beecc
056829f
1fd2df8
 
 
056829f
f3beecc
1fd2df8
f3beecc
f1a05f0
 
 
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056829f
1fd2df8
 
 
 
 
056829f
 
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056829f
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056829f
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056829f
1fd2df8
056829f
1fd2df8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
056829f
1fd2df8

import spaces
from datetime import datetime
import gc
import gradio as gr
import numpy as np
import random
from pathlib import Path
import os

from diffusers import AutoencoderKLLTXVideo, LTXPipeline, LTXVideoTransformer3DModel
from diffusers.utils import export_to_video
from transformers import T5EncoderModel, T5Tokenizer
import torch

from utils import install_packages

torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.jit._state.disable()
torch.set_grad_enabled(False)

gc.collect()
torch.cuda.empty_cache()

ckpt_path = Path("a-r-r-o-w/LTX-Video-0.9.1-diffusers")
single_file_url = "https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.1.safetensors"
transformer = LTXVideoTransformer3DModel.from_single_file(
    single_file_url, torch_dtype=torch.bfloat16
)
vae = AutoencoderKLLTXVideo.from_single_file(
    single_file_url, torch_dtype=torch.bfloat16)
vae.eval()
vae = vae.to("cuda")

text_encoder = T5EncoderModel.from_pretrained(
    ckpt_path,
    subfolder="text_encoder",
    torch_dtype=torch.bfloat16
)
text_encoder.eval()
text_encoder = text_encoder.to("cuda")

tokenizer = T5Tokenizer.from_pretrained(
    ckpt_path,
    subfolder="tokenizer"
)

pipeline = LTXPipeline.from_single_file(
    single_file_url,
    transformer=transformer,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    vae=vae,
    torch_dtype=torch.bfloat16
)
# pipeline.enable_model_cpu_offload()

pipeline.vae.enable_tiling()
pipeline.vae.enable_slicing()

pipeline = pipeline.to("cuda")


MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1280


@spaces.GPU()
def infer(
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    width=704,
    height=448,
    num_frames=129,
    fps=24,
    num_inference_steps=30,
    progress=gr.Progress(track_tqdm=True),
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator(device='cuda').manual_seed(seed)

    with torch.amp.autocast_mode.autocast('cuda', torch.bfloat16), torch.no_grad(), torch.inference_mode():
        video = pipeline(
            prompt=prompt,
            negative_prompt=negative_prompt,
            width=width,
            height=height,
            num_frames=num_frames,
            # guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
            # decode_timestep=decode_timestep,
            # decode_noise_scale=decode_noise_scale,
            generator=generator,
            # max_sequence_length=512,
        ).frames[0]

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"output_{timestamp}.mp4"
    os.makedirs("output", exist_ok=True)
    output_path = f"./output/{filename}"
    export_to_video(video, output_path, fps=fps)

    gc.collect
    torch.cuda.empty_cache()
    return output_path


css = """
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(" # Text-to-Image Gradio Template")

        with gr.Row():
            prompt = gr.Textbox(
                label="Prompt",
                lines=3,
                value=str("A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"),
            )

            negative_prompt = gr.Textbox(
                label="Negative prompt",
                lines=3,
                value=str("worst quality, blurry, distorted"),
            )

        with gr.Row():
            run_button = gr.Button("Run", scale=0, variant="huggingface")

        with gr.Row():
            result = gr.Video(label="Result", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )

            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(
                    label="Width",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=704,  # Replace with defaults that work for your model
                )

                height = gr.Slider(
                    label="Height",
                    minimum=256,
                    maximum=MAX_IMAGE_SIZE,
                    step=32,
                    value=448,  # Replace with defaults that work for your model
                )

            with gr.Row():
                num_frames = gr.Slider(
                    label="Number of frames",
                    minimum=1,
                    maximum=257,
                    step=32,
                    value=129,  # Replace with defaults that work for your model
                )

                fps = gr.Slider(
                    label="Number of frames per second",
                    minimum=1,
                    maximum=30,
                    step=1,
                    value=24,  # Replace with defaults that work for your model
                )

            with gr.Row():

                num_inference_steps = gr.Slider(
                    label="Number of inference steps",
                    minimum=1,
                    maximum=50,
                    step=1,
                    value=30,  # Replace with defaults that work for your model
                )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            num_frames,
            fps,
            num_inference_steps,
        ],
        outputs=[result],
    )

if __name__ == "__main__":
    install_packages()
    demo.launch()