File size: 3,820 Bytes
5bd9da0
 
 
 
 
0413f13
5bd9da0
 
 
4ffd894
 
 
 
 
 
 
 
 
 
 
 
5bd9da0
 
 
455cd10
0413f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455cd10
 
 
5bd9da0
30787d4
0413f13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30787d4
4ffd894
 
 
 
 
 
 
 
30787d4
5bd9da0
 
455cd10
4ffd894
c19b47e
 
5bd9da0
 
 
 
 
 
4ffd894
30787d4
5bd9da0
592653f
c19b47e
 
 
455cd10
c19b47e
 
62a5a51
 
c19b47e
5bd9da0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import torch
from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter
from diffusers.utils import export_to_gif
from diffusers.utils import export_to_video
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import uuid
import spaces

# Available adapters (replace with your actual adapter names)
adapter_options = {
    "zoom-out":"guoyww/animatediff-motion-lora-zoom-out",
    "zoom-in":"guoyww/animatediff-motion-lora-zoom-in",
    "pan-left":"guoyww/animatediff-motion-lora-pan-left",
    "pan-right":"guoyww/animatediff-motion-lora-pan-right",
    "roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise",
    "roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise",
    "tilt-up":"guoyww/animatediff-motion-lora-tilt-up",
    "tilt-down":"guoyww/animatediff-motion-lora-tilt-down"
}

device = "cuda"
adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
model_id = "SG161222/Realistic_Vision_V5.1_noVAE"

model_llm = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
    device="cuda"
)
tokenizer_llm = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct",device="cuda")

pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device)
scheduler = DDIMScheduler.from_pretrained(
    model_id,
    subfolder="scheduler",
    clip_sample=False,
    timestep_spacing="linspace",
    beta_schedule="linear",
    steps_offset=1,
)
pipe.scheduler = scheduler

@spaces.GPU
def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices):

    pipe.to(device)

    messages = [
        {"role": "user", "content": "You have to complete my given prompt into a complete description. The description should be heavily detailed. Feel free to add your own fillers if need. The purpose of this description is to descibe a video generation. My Prompt: " + prompt},
    ]

    pipe_llm = pipeline(
        "text-generation",
        model=model_llm,
        tokenizer=tokenizer_llm,
        device="cuda"
    )

    generation_args = {
        "max_new_tokens": 500,
        "return_full_text": False,
        "temperature": 1,
        "do_sample": False,
    }

    output = pipe_llm(messages, **generation_args)
    print(output[0]['generated_text'])

    # Set adapters based on user selection
    if adapter_choices:
        for i in range(len(adapter_choices)):
            adapter_name = adapter_choices[i]
            pipe.load_lora_weights(
                adapter_options[adapter_name], adapter_name=adapter_name,
            )
        pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices))
        print(adapter_choices)

    output = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_frames=16,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
    )
    name = str(uuid.uuid4()).replace("-", "")
    path = f"/tmp/{name}.mp4"
    export_to_video(output.frames[0], path, fps=10)
    return path



iface = gr.Interface(
    theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"),
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Enter your prompt"),
        gr.Textbox(label="Negative Prompt"),
        gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"),
        gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"),
        gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'),
        # Updated for multiple selections
    ],
    outputs=gr.Video(label="Generated Video"),
)

iface.launch()