import gradio as gr import torch from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter from diffusers.utils import export_to_gif from diffusers.utils import export_to_video from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import uuid import spaces # Available adapters (replace with your actual adapter names) adapter_options = { "zoom-out":"guoyww/animatediff-motion-lora-zoom-out", "zoom-in":"guoyww/animatediff-motion-lora-zoom-in", "pan-left":"guoyww/animatediff-motion-lora-pan-left", "pan-right":"guoyww/animatediff-motion-lora-pan-right", "roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise", "roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise", "tilt-up":"guoyww/animatediff-motion-lora-tilt-up", "tilt-down":"guoyww/animatediff-motion-lora-tilt-down" } device = "cuda" adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16) model_id = "SG161222/Realistic_Vision_V5.1_noVAE" model_llm = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-128k-instruct", device_map="cuda", torch_dtype="auto", trust_remote_code=True, ) tokenizer_llm = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct") pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device) scheduler = DDIMScheduler.from_pretrained( model_id, subfolder="scheduler", clip_sample=False, timestep_spacing="linspace", beta_schedule="linear", steps_offset=1, ) pipe.scheduler = scheduler @spaces.GPU def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices): pipe.to(device) messages = [ {"role": "user", "content": "You have to complete my given prompt into a complete description. The description should be heavily detailed. Feel free to add your own fillers if need. The purpose of this description is to descibe a video generation. My Prompt: " + prompt}, ] pipe_llm = pipeline( "text-generation", model=model_llm, tokenizer=tokenizer_llm, device_map='auto' ) generation_args = { "max_new_tokens": 500, "return_full_text": False, "temperature": 1, "do_sample": False, } output = pipe_llm(messages, **generation_args) print(output[0]['generated_text']) # Set adapters based on user selection if adapter_choices: for i in range(len(adapter_choices)): adapter_name = adapter_choices[i] pipe.load_lora_weights( adapter_options[adapter_name], adapter_name=adapter_name, ) pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices)) print(adapter_choices) output = pipe( prompt=prompt, negative_prompt=negative_prompt, num_frames=16, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, ) name = str(uuid.uuid4()).replace("-", "") path = f"/tmp/{name}.mp4" export_to_video(output.frames[0], path, fps=10) return path iface = gr.Interface( theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"), fn=generate_video, inputs=[ gr.Textbox(label="Enter your prompt"), gr.Textbox(label="Negative Prompt"), gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"), gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"), gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'), # Updated for multiple selections ], outputs=gr.Video(label="Generated Video"), ) iface.launch()