Spaces:

roll-ai
/

EPiC

Paused

EPiC / gradio_app.py

Muhammad Taqi Raza

GPU setting

1aab056 4 days ago

12.4 kB

	import os
	import subprocess
	from datetime import datetime
	from pathlib import Path
	import gradio as gr
	import numpy as np
	import os
	from spaces import GPU
	# -----------------------------
	# Setup paths and env
	# -----------------------------
	HF_HOME = "/home/user/app/hf_cache"
	os.environ["HF_HOME"] = HF_HOME
	os.environ["TRANSFORMERS_CACHE"] = HF_HOME
	os.makedirs(HF_HOME, exist_ok=True)

	# hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
	# snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")

	PRETRAINED_DIR = "/home/user/app/pretrained"
	os.makedirs(PRETRAINED_DIR, exist_ok=True)

	# -----------------------------

	# Step 1: Optional Model Download
	# -----------------------------
	def download_models():
	expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
	if not Path(expected_model).exists():
	print("⚙️ Downloading pretrained models...")
	try:
	subprocess.check_call(["bash", "download/download_models.sh"])
	print("✅ Models downloaded.")
	except subprocess.CalledProcessError as e:
	print(f"Model download failed: {e}")
	else:
	print("✅ Pretrained models already exist.")


	# -----------------------------
	# Step 1: Get Anchor Video
	# -----------------------------
	def get_anchor_video(video_path, fps, num_frames, target_pose, mode,
	radius_scale, near_far_estimated,
	sampler_name, diffusion_guidance_scale, diffusion_inference_steps,
	prompt, negative_prompt, refine_prompt,
	depth_inference_steps, depth_guidance_scale,
	window_size, overlap, max_res, sample_size,
	seed_input, height, width, aspect_ratio_inputs,
	init_dx, init_dy, init_dz):

	temp_input_path = "/home/user/app/temp_input.mp4"
	output_dir = "/home/user/app/output_anchor"
	video_output_path = f"{output_dir}/masked_videos/output.mp4"

	if video_path:
	os.system(f"cp '{video_path}' {temp_input_path}")

	try:
	theta, phi, r, x, y = target_pose.strip().split()
	except ValueError:
	return f"Invalid target pose format. Use: θ φ r x y", None, None
	logs = f"Running inference with target pose: θ={theta}, φ={phi}, r={r}, x={x}, y={y}\n"
	w, h = aspect_ratio_inputs.strip().split(",")
	h_s, w_s = sample_size.strip().split(",")

	command = [
	"python", "/home/user/app/inference/v2v_data/inference.py",
	"--video_path", temp_input_path,
	"--stride", "1",
	"--out_dir", output_dir,
	"--radius_scale", str(radius_scale),
	"--camera", "target",
	"--mask",
	"--target_pose", theta, phi, r, x, y,
	"--video_length", str(num_frames),
	"--save_name", "output",
	"--mode", mode,
	"--fps", str(fps),
	"--depth_inference_steps", str(depth_inference_steps),
	"--depth_guidance_scale", str(depth_guidance_scale),
	"--near_far_estimated", str(near_far_estimated),
	"--sampler_name", sampler_name,
	"--diffusion_guidance_scale", str(diffusion_guidance_scale),
	"--diffusion_inference_steps", str(diffusion_inference_steps),
	"--prompt", prompt if prompt else "",
	"--negative_prompt", negative_prompt,
	"--refine_prompt", refine_prompt,
	"--window_size", str(window_size),
	"--overlap", str(overlap),
	"--max_res", str(max_res),
	"--sample_size", h_s.strip(), w_s.strip(),
	"--seed", str(seed_input),
	"--height", str(height),
	"--width", str(width),
	"--target_aspect_ratio", w.strip(), h.strip(),
	"--init_dx", str(init_dx),
	"--init_dy", str(init_dy),
	"--init_dz", str(init_dz),

	]

	try:
	result = subprocess.run(command, capture_output=True, text=True, check=True)
	logs += result.stdout
	except subprocess.CalledProcessError as e:
	logs += f"Inference failed:\n{e.stderr}{e.stdout}"
	return None, logs

	return str(video_output_path), logs
	# -----------------------------
	# Step 2: Run Inference
	# -----------------------------
	@GPU
	def inference(
	fps, num_frames, controlnet_weights, controlnet_guidance_start,
	controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
	seed, height, width, downscale_coef, vae_channels,
	controlnet_input_channels, controlnet_transformer_num_layers
	):
	model_path = "/home/user/app/pretrained/CogVideoX-5b-I2V"
	ckpt_path = "/home/user/app/out/EPiC_pretrained/checkpoint-500.pt"
	video_root_dir = "/home/user/app/output_anchor"
	out_dir = "/home/user/app/output"

	command = [
	"python", "/home/user/app/inference/cli_demo_camera_i2v_pcd.py",
	"--video_root_dir", video_root_dir,
	"--base_model_path", model_path,
	"--controlnet_model_path", ckpt_path,
	"--output_path", out_dir,
	"--controlnet_weights", str(controlnet_weights),
	"--controlnet_guidance_start", str(controlnet_guidance_start),
	"--controlnet_guidance_end", str(controlnet_guidance_end),
	"--guidance_scale", str(guidance_scale),
	"--num_inference_steps", str(num_inference_steps),
	"--dtype", dtype,
	"--seed", str(seed),
	"--height", str(height),
	"--width", str(width),
	"--num_frames", str(num_frames),
	"--fps", str(fps),
	"--downscale_coef", str(downscale_coef),
	"--vae_channels", str(vae_channels),
	"--controlnet_input_channels", str(controlnet_input_channels),
	"--controlnet_transformer_num_layers", str(controlnet_transformer_num_layers),

	]

	try:
	result = subprocess.run(command, capture_output=True, text=True, check=True)
	logs = result.stdout
	except subprocess.CalledProcessError as e:
	logs = f"❌ Step 2 Inference Failed:\nSTDERR:\n{e.stderr}\nSTDOUT:\n{e.stdout}"
	return None, logs
	video_output = f"{out_dir}/00000_{seed}_out.mp4"
	return video_output if os.path.exists(video_output) else None, logs

	# -----------------------------
	# UI
	# -----------------------------
	demo = gr.Blocks()

	with demo:
	gr.Markdown("## 🎬 EPiC: Cinematic Camera Control")

	with gr.Tabs():
	with gr.TabItem("Step 1: Camera Anchor"):
	with gr.Row():
	with gr.Column():
	with gr.Row():
	near_far_estimated = gr.Checkbox(label="Near Far Estimation", value=True)
	pose_input = gr.Textbox(label="Target Pose (θ φ r x y)", placeholder="e.g., 0 30 -0.6 0 0")
	fps_input = gr.Number(value=24, label="FPS")
	aspect_ratio_inputs=gr.Textbox(label="Target Aspect Ratio (e.g., 2,3)")

	init_dx = gr.Number(value=0.0, label="Start Camera Offset X")
	init_dy = gr.Number(value=0.0, label="Start Camera Offset Y")
	init_dz = gr.Number(value=0.0, label="Start Camera Offset Z")

	num_frames_input = gr.Number(value=49, label="Number of Frames")
	radius_input = gr.Number(value = 1.0, label="Radius Scale")
	mode_input = gr.Dropdown(choices=["gradual"], value="gradual", label="Camera Mode")
	sampler_input = gr.Dropdown(choices=["Euler", "Euler A", "DPM++", "PNDM", "DDIM_Cog", "DDIM_Origin"], value="DDIM_Origin", label="Sampler")
	diff_guidance_input = gr.Number(value=6.0, label="Diffusion Guidance")
	diff_steps_input = gr.Number(value=50, label="Diffusion Steps")
	depth_steps_input = gr.Number(value=5, label="Depth Steps")
	depth_guidance_input = gr.Number(value=1.0, label="Depth Guidance")
	window_input = gr.Number(value=64, label="Window Size")
	overlap_input = gr.Number(value=25, label="Overlap")
	maxres_input = gr.Number(value=1920, label="Max Resolution")
	sample_size = gr.Textbox(label="Sample Size (height, width)", placeholder="e.g., 384, 672", value="384, 672")
	seed_input = gr.Number(value=43, label="Seed")
	height = gr.Number(value=480, label="Height")
	width = gr.Number(value=720, label="Width")
	prompt_input = gr.Textbox(label="Prompt")
	neg_prompt_input = gr.Textbox(label="Negative Prompt", value="The video is not of a high quality, it has a low resolution. Watermark present in each frame. The background is solid. Strange body and strange trajectory.")
	refine_prompt_input = gr.Textbox(label="Refine Prompt", value=" The video is of high quality, and the view is very clear. ")
	with gr.Column():
	video_input = gr.Video(label="Upload Video (MP4)")
	step1_button = gr.Button("▶️ Run Step 1")
	step1_video = gr.Video(label="[Step 1] Masked Video")
	step1_logs = gr.Textbox(label="[Step 1] Logs")

	with gr.TabItem("Step 2: CogVideoX Refinement"):
	with gr.Row():
	with gr.Column():
	with gr.Row():

	controlnet_weights_input = gr.Number(value=0.5, label="ControlNet Weights")
	controlnet_guidance_start_input = gr.Number(value=0.0, label="Guidance Start")
	controlnet_guidance_end_input = gr.Number(value=0.5, label="Guidance End")
	guidance_scale_input = gr.Number(value=6.0, label="Guidance Scale")
	inference_steps_input = gr.Number(value=50, label="Num Inference Steps")
	dtype_input = gr.Dropdown(choices=["float16", "bfloat16"], value="bfloat16", label="Compute Dtype")
	seed_input2 = gr.Number(value=42, label="Seed")
	height_input = gr.Number(value=480, label="Height")
	width_input = gr.Number(value=720, label="Width")
	num_frames_input2 = gr.Number(value=49, label="Num Frames")
	fps_input2 = gr.Number(value=24, label="FPS")
	downscale_coef_input = gr.Number(value=8, label="Downscale Coef")
	vae_channels_input = gr.Number(value=16, label="VAE Channels")
	controlnet_input_channels_input = gr.Number(value=6, label="ControlNet Input Channels")
	controlnet_layers_input = gr.Number(value=8, label="ControlNet Transformer Layers")
	with gr.Column():
	step2_video = gr.Video(label="[Step 2] Final Refined Video")
	step2_button = gr.Button("▶️ Run Step 2")
	step2_logs = gr.Textbox(label="[Step 2] Logs")


	step1_button.click(
	get_anchor_video,
	inputs=[
	video_input, fps_input, num_frames_input, pose_input, mode_input,
	radius_input, near_far_estimated,
	sampler_input, diff_guidance_input, diff_steps_input,
	prompt_input, neg_prompt_input, refine_prompt_input,
	depth_steps_input, depth_guidance_input,
	window_input, overlap_input, maxres_input, sample_size,
	seed_input, height, width, aspect_ratio_inputs,
	init_dx, init_dy, init_dz # ← NEW INPUTS
	],
	outputs=[step1_video, step1_logs]
	)

	step2_button.click(
	inference,
	inputs=[
	fps_input2, num_frames_input2,
	controlnet_weights_input, controlnet_guidance_start_input,
	controlnet_guidance_end_input, guidance_scale_input,
	inference_steps_input, dtype_input, seed_input2,
	height_input, width_input, downscale_coef_input,
	vae_channels_input, controlnet_input_channels_input,
	controlnet_layers_input
	],
	outputs=[step2_video, step2_logs]
	)

	if __name__ == "__main__":
	print("Current working directory:", os.getcwd())
	download_models()
	demo.launch(server_name="0.0.0.0", server_port=7860)