Spaces:

Maximofn
/

finetrainers_video_effects

Sleeping

App Files Files Community

finetrainers_video_effects / app.py

Maximofn

Remove load_example_video function and update example prompts with direct video paths

490f3d3 about 2 months ago

raw

history blame contribute delete

9.45 kB

	import gradio as gr
	from diffusers import CogVideoXTransformer3DModel, DiffusionPipeline
	from diffusers.utils import export_to_video
	import torch
	import tempfile
	import os
	import spaces

	# Available transformer models
	TRANSFORMER_MODELS = [
	"sayakpaul/pika-dissolve-v0",
	"finetrainers/crush-smol-v0",
	"finetrainers/3dgs-v0",
	"finetrainers/cakeify-v0"
	]

	# Model prefix mapping
	MODEL_PREFIXES = {
	"sayakpaul/pika-dissolve-v0": "PIKA_DISSOLVE",
	"finetrainers/crush-smol-v0": "DIFF_crush",
	"finetrainers/3dgs-v0": "3D_dissolve",
	"finetrainers/cakeify-v0": "PIKA_CAKEIFY"
	}

	def check_and_fix_prompt(transformer_model, prompt):
	"""Check and fix prompt according to model requirements"""
	required_prefix = MODEL_PREFIXES.get(transformer_model)
	if not required_prefix:
	print(f"No required prefix found for model: {transformer_model}")
	return prompt

	# Remove leading/trailing whitespace
	prompt = prompt.strip()

	# Check if prompt already starts with the required prefix
	if not prompt.startswith(required_prefix):
	print(f"Adding required prefix '{required_prefix}' to prompt")
	prompt = f"{required_prefix} {prompt}"

	return prompt

	def load_models(transformer_model):
	"""Load transformer and pipeline models"""
	# Load the selected transformer model
	print(f"Loading model: {transformer_model}")
	transformer = CogVideoXTransformer3DModel.from_pretrained(
	transformer_model,
	torch_dtype=torch.bfloat16
	)

	# Initialize the pipeline
	print("Initializing pipeline")
	pipeline = DiffusionPipeline.from_pretrained(
	"THUDM/CogVideoX-5b",
	transformer=transformer,
	torch_dtype=torch.bfloat16
	)

	return pipeline

	def save_video(video_frames, fps=25):
	"""Save video frames to a temporary file"""
	print("Saving video")
	with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
	export_to_video(video_frames, tmp_file.name, fps=fps)
	return tmp_file.name

	@spaces.GPU(duration=600)
	def generate_video_pipeline(pipeline, prompt, negative_prompt, num_frames, height, width, num_inference_steps):
	"""Generate video using the pipeline"""
	print(f"Is CUDA available: {torch.cuda.is_available()}")
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

	# Move to appropriate device
	print("Moving to device")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	pipeline = pipeline.to(device)

	# Generate video
	print("Generating video")
	video_frames = pipeline(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_frames=num_frames,
	height=height,
	width=width,
	num_inference_steps=num_inference_steps
	).frames[0]

	print("Video generated")
	return video_frames

	def generate_video(transformer_model, prompt, negative_prompt, num_frames, height, width, num_inference_steps):
	"""Main function to handle the video generation process"""
	# Check and fix prompt
	print(f"Original prompt: {prompt}")
	prompt = check_and_fix_prompt(transformer_model, prompt)
	print(f"Final prompt: {prompt}")

	# Load models
	pipeline = load_models(transformer_model)

	# Generate video frames
	video_frames = generate_video_pipeline(
	pipeline,
	prompt,
	negative_prompt,
	num_frames,
	height,
	width,
	num_inference_steps
	)

	# Save and return video path
	print("Saving video")
	return save_video(video_frames)

	def create_interface():
	"""Create and configure the Gradio interface"""
	with gr.Blocks() as demo:
	gr.Markdown("# CogVideoX Video Generator")

	with gr.Row():
	with gr.Column():
	# Inputs
	model_dropdown = gr.Dropdown(
	choices=TRANSFORMER_MODELS,
	value=TRANSFORMER_MODELS[0],
	label="Transformer Model"
	)
	prompt_input = gr.Textbox(
	lines=5,
	label="Prompt",
	placeholder="Describe the video you want to generate..."
	)
	negative_prompt_input = gr.Textbox(
	lines=2,
	label="Negative Prompt",
	value="inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs"
	)

	with gr.Accordion("Advanced Parameters", open=False):
	num_frames = gr.Slider(
	minimum=8,
	maximum=128,
	value=50,
	step=1,
	label="Number of Frames",
	info="Number of frames in the video"
	)
	height = gr.Slider(
	minimum=32,
	maximum=1024,
	value=512,
	step=64,
	label="Height",
	info="Video height in pixels"
	)
	width = gr.Slider(
	minimum=32,
	maximum=1024,
	value=512,
	step=64,
	label="Width",
	info="Video width in pixels"
	)
	num_inference_steps = gr.Slider(
	minimum=10,
	maximum=100,
	value=50,
	step=1,
	label="Inference Steps",
	info="Higher number = better quality but slower"
	)

	generate_btn = gr.Button("Generate Video")

	with gr.Column():
	# Output
	video_output = gr.Video(label="Generated Video")

	# Add examples
	gr.Examples(
	examples=[
	[
	"sayakpaul/pika-dissolve-v0",
	"PIKA_DISSOLVE A slender glass vase, brimming with tiny white pebbles, stands centered on a polished ebony dais. Without warning, the glass begins to dissolve from the edges inward. Wisps of translucent dust swirl upward in an elegant spiral, illuminating each pebble as they drop onto the dais. The gently drifting dust eventually settles, leaving only the scattered stones and faint traces of shimmering powder on the stage.",
	"inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
	50, 512, 512, 50,
	"example_outputs/pika-dissolve-v0.mp4"
	],
	[
	"finetrainers/crush-smol-v0",
	"DIFF_crush A thick burger is placed on a dining table, and a large metal cylinder descends from above, crushing the burger as if it were under a hydraulic press. The bulb is crushed, leaving a pile of debris around it.",
	"inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
	50, 512, 512, 50,
	"example_outputs/crush-smol-v0.mp4"
	],
	[
	"finetrainers/3dgs-v0",
	"3D_dissolve In a 3D appearance, a bookshelf filled with books is surrounded by a burst of red sparks, creating a dramatic and explosive effect against a black background.",
	"inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
	50, 512, 512, 50,
	"example_outputs/3dgs-v0.mp4"
	],
	[
	"finetrainers/cakeify-v0",
	"PIKA_CAKEIFY On a gleaming glass display stand, a sleek black purse quietly commands attention. Suddenly, a knife appears and slices through the shoe, revealing a fluffy vanilla sponge at its core. Immediately, it turns into a hyper-realistic prop cake, delighting the senses with its playful juxtaposition of the everyday and the extraordinary.",
	"inconsistent motion, blurry motion, worse quality, degenerate outputs, deformed outputs",
	50, 512, 512, 50,
	"example_outputs/cakeify-v0.mp4"
	]
	],
	inputs=[
	model_dropdown,
	prompt_input,
	negative_prompt_input,
	num_frames,
	height,
	width,
	num_inference_steps,
	video_output,
	],
	label="Prompt Examples"
	)

	# Connect the function
	generate_btn.click(
	fn=generate_video,
	inputs=[
	model_dropdown,
	prompt_input,
	negative_prompt_input,
	num_frames,
	height,
	width,
	num_inference_steps
	],
	outputs=video_output
	)

	return demo

	# Launch the application
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()