I2VGenXL-Text-Image-To-Video

Running

File size: 2,922 Bytes

b7f7bb6
 
 
 
fe77a8e
086180d
b7f7bb6
086180d
4c34823
 
d03a679
7d2b140
b7f7bb6
30a8deb
7d2b140
4c34823
30a8deb
b7f7bb6
4c34823
fe77a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7f7bb6
 
fe77a8e
 
 
b7f7bb6
 
 
fe77a8e
d03a679
c7d5c0d
 
 
 
 
 
 
 
 
 
d03a679
c7d5c0d
 
d03a679
c7d5c0d
 
4c34823
c7d5c0d
 
 
 
 
 
 
086180d

import gradio as gr
import torch
from diffusers import I2VGenXLPipeline
from diffusers.utils import export_to_gif, load_image
import tempfile
import spaces

@spaces.GPU
def initialize_pipeline():
    # Initialize the pipeline without CUDA support
    pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16")
    return pipeline

def generate_gif(prompt, image, negative_prompt, num_inference_steps, guidance_scale, seed):
    # Initialize the pipeline within the function
    pipeline = initialize_pipeline()

    # Set the generator seed
    generator = torch.Generator().manual_seed(seed)

    # Check if an image is provided
    if image is not None:
        image = load_image(image).convert("RGB")
        frames = pipeline(
            prompt=prompt,
            image=image,
            num_inference_steps=num_inference_steps,
            negative_prompt=negative_prompt,
            guidance_scale=guidance_scale,
            generator=generator
        ).frames[0]
    else:
        frames = pipeline(
            prompt=prompt,
            num_inference_steps=num_inference_steps,
            negative_prompt=negative_prompt,
            guidance_scale=guidance_scale,
            generator=generator
        ).frames[0]

    # Export to GIF
    with tempfile.NamedTemporaryFile(delete=False, suffix=".gif") as tmp_gif:
        gif_path = tmp_gif.name
        export_to_gif(frames, gif_path)

    return gif_path

# Create the Gradio interface with tabs
with gr.Blocks() as demo:
    with gr.Tabs():
        with gr.TabItem("Generate from Text"):
            with gr.Row():
                with gr.Column():
                    text_prompt = gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt")
                    text_negative_prompt = gr.Textbox(lines=2, placeholder="Enter your negative prompt here...", label="Negative Prompt")
                    text_num_inference_steps = gr.Slider(1, 100, step=1, value=50, label="Number of Inference Steps")
                    text_guidance_scale = gr.Slider(1, 20, step=0.1, value=9.0, label="Guidance Scale")
                    text_seed = gr.Number(label="Seed", value=8888)
                    text_generate_button = gr.Button("Generate GIF")

                with gr.Column():
                    text_output_video = gr.Video(label="Generated GIF")

            text_generate_button.click(
                fn=generate_gif,
                inputs=[text_prompt, None, text_negative_prompt, text_num_inference_steps, text_guidance_scale, text_seed],
                outputs=text_output_video
            )

        with gr.TabItem("Generate from Image"):
            with gr.Row():
                with gr.Column():
                    image_prompt = gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt")
                    image_input = gr.Image(type="filepath"