Spaces:

ModularityAI
/

GenVideo

Runtime error

App Files Files Community

GenVideo / app.py

hanzla

sliders added

0413f13 over 1 year ago

raw

history blame

3.82 kB

	import gradio as gr
	import torch
	from diffusers import AnimateDiffPipeline, DDIMScheduler, MotionAdapter
	from diffusers.utils import export_to_gif
	from diffusers.utils import export_to_video
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import uuid
	import spaces

	# Available adapters (replace with your actual adapter names)
	adapter_options = {
	"zoom-out":"guoyww/animatediff-motion-lora-zoom-out",
	"zoom-in":"guoyww/animatediff-motion-lora-zoom-in",
	"pan-left":"guoyww/animatediff-motion-lora-pan-left",
	"pan-right":"guoyww/animatediff-motion-lora-pan-right",
	"roll-clockwise":"guoyww/animatediff-motion-lora-rolling-clockwise",
	"roll-anticlockwise":"guoyww/animatediff-motion-lora-rolling-anticlockwise",
	"tilt-up":"guoyww/animatediff-motion-lora-tilt-up",
	"tilt-down":"guoyww/animatediff-motion-lora-tilt-down"
	}

	device = "cuda"
	adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-2", torch_dtype=torch.float16)
	model_id = "SG161222/Realistic_Vision_V5.1_noVAE"

	model_llm = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3-mini-128k-instruct",
	device_map="cuda",
	torch_dtype="auto",
	trust_remote_code=True,
	device="cuda"
	)
	tokenizer_llm = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct",device="cuda")

	pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=adapter, torch_dtype=torch.float16).to(device)
	scheduler = DDIMScheduler.from_pretrained(
	model_id,
	subfolder="scheduler",
	clip_sample=False,
	timestep_spacing="linspace",
	beta_schedule="linear",
	steps_offset=1,
	)
	pipe.scheduler = scheduler

	@spaces.GPU
	def generate_video(prompt,negative_prompt, guidance_scale, num_inference_steps, adapter_choices):

	pipe.to(device)

	messages = [
	{"role": "user", "content": "You have to complete my given prompt into a complete description. The description should be heavily detailed. Feel free to add your own fillers if need. The purpose of this description is to descibe a video generation. My Prompt: " + prompt},
	]

	pipe_llm = pipeline(
	"text-generation",
	model=model_llm,
	tokenizer=tokenizer_llm,
	device="cuda"
	)

	generation_args = {
	"max_new_tokens": 500,
	"return_full_text": False,
	"temperature": 1,
	"do_sample": False,
	}

	output = pipe_llm(messages, **generation_args)
	print(output[0]['generated_text'])

	# Set adapters based on user selection
	if adapter_choices:
	for i in range(len(adapter_choices)):
	adapter_name = adapter_choices[i]
	pipe.load_lora_weights(
	adapter_options[adapter_name], adapter_name=adapter_name,
	)
	pipe.set_adapters(adapter_choices, adapter_weights=[1.0] * len(adapter_choices))
	print(adapter_choices)

	output = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_frames=16,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	)
	name = str(uuid.uuid4()).replace("-", "")
	path = f"/tmp/{name}.mp4"
	export_to_video(output.frames[0], path, fps=10)
	return path



	iface = gr.Interface(
	theme=gr.themes.Soft(primary_hue="red", secondary_hue="pink"),
	fn=generate_video,
	inputs=[
	gr.Textbox(label="Enter your prompt"),
	gr.Textbox(label="Negative Prompt"),
	gr.Slider(minimum=0.5, maximum=10, value=7.5, label="Guidance Scale"),
	gr.Slider(minimum=4, maximum=24, step=4, value=4, label="Inference Steps"),
	gr.CheckboxGroup(adapter_options.keys(), label="Adapter Choice",type='value'),
	# Updated for multiple selections
	],
	outputs=gr.Video(label="Generated Video"),
	)

	iface.launch()