Spaces:

ModularityAI
/

GenVideo

Runtime error

File size: 3,755 Bytes

5bd9da0
 
 
 
 
0413f13
5bd9da0
 
 
4ffd894
 
 
 
 
 
 
 
 
 
 
 
5bd9da0
 
 
455cd10
0413f13
 
 
 
 
 
55d1776
0413f13
 
 
 
 
 
 
 
 
 
 
 
455cd10
 
 
5bd9da0
30787d4
0413f13
057d644
0413f13
 
 
 
 
 
ccc2467
0413f13
 
dafdac1
0413f13
8c73978
0413f13
 
 
 
 
 
30787d4
4ffd894
 
 
 
 
 
 
 
30787d4
5bd9da0
8c73978
455cd10
4ffd894
c19b47e
 
5bd9da0
 
 
 
 
 
4ffd894
30787d4
5bd9da0
592653f
c19b47e
 
 
455cd10
c19b47e
 
62a5a51
 
c19b47e
5bd9da0

import gradio as gr
import torch
from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter
from diffusers.utils import export_to_gif
from diffusers.utils import export_to_video
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import uuid
import spaces

# Available adapters (replace with your actual adapter names)
adapter_options = {
    "zoom-out":"guoyww/animatediff-motion-lora-zoom-out",
    "zoom-in":"guoyww/animatediff-motion-lora-zoom-in",
    "pan-left":"guoyww/animatediff-motion-lora-pan-left",
    "pan-right":"guoyww/animatediff-motion-lora-pan-right",
    "roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise",
    "roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise",
    "tilt-up":"guoyww/animatediff-motion-lora-tilt-up",
    "tilt-down":"guoyww/animatediff-motion-lora-tilt-down"
}

device = "cuda"
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
model_id = "SG161222/Realistic_Vision_V5.1_noVAE"

model_llm = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer_llm = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")

pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device)
scheduler = DDIMScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    clip_sample=False,
    timestep_spacing="linspace",
    beta_schedule="linear",
    steps_offset=1,
)
pipe.scheduler = scheduler

@spaces.GPU
def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices):

    pipe.to(device)

    messages = [
        {"role": "user", "content": "You have to complete my given prompt into a complete description. The purpose of this description is to descibe a video generation. Follow the order of prompt. My Prompt: " + prompt},
    ]

    pipe_llm = pipeline(
        "text-generation",
        model=model_llm,
        tokenizer=tokenizer_llm,
        device_map='auto'
    )
    generation_args = {
        "max_new_tokens": 512,
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }

    output = pipe_llm(messages, **generation_args)
    print(output[0]['generated_text'])

    # Set adapters based on user selection
    if adapter_choices:
        for i in range(len(adapter_choices)):
            adapter_name = adapter_choices[i]
            pipe.load_lora_weights(
                adapter_options[adapter_name], adapter_name=adapter_name,
            )
        pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices))
        print(adapter_choices)

    output = pipe(
        prompt=output[0]['generated_text'],
        negative_prompt=negative_prompt,
        num_frames=16,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
    )
    name = str(uuid.uuid4()).replace("-", "")
    path = f"/tmp/{name}.mp4"
    export_to_video(output.frames[0], path, fps=10)
    return path



iface = gr.Interface(
    theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"),
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Enter your prompt"),
        gr.Textbox(label="Negative Prompt"),
        gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"),
        gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"),
        gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'),
        # Updated for multiple selections
    ],
    outputs=gr.Video(label="Generated Video"),
)

iface.launch()