Spaces:

roll-ai
/

EPiC

Running on L40S

App Files Files Community

Muhammad Taqi Raza commited on 14 days ago

Commit

15db18d

1 Parent(s): 8e3cdd5

adding estimate near_far

Browse files

Files changed (2) hide show

gradio_app.py +7 -170
inference/v2v_data/models/infer.py +9 -0

gradio_app.py CHANGED Viewed

@@ -1,179 +1,10 @@
-# import os
-# import subprocess
-# from datetime import datetime
-# from pathlib import Path
-# import gradio as gr
-# # -----------------------------
-# # Setup paths and env
-# # -----------------------------
-# HF_HOME = "/app/hf_cache"
-# os.environ["HF_HOME"] = HF_HOME
-# os.environ["TRANSFORMERS_CACHE"] = HF_HOME
-# os.makedirs(HF_HOME, exist_ok=True)
-# PRETRAINED_DIR = "/app/pretrained"
-# os.makedirs(PRETRAINED_DIR, exist_ok=True)
-# # -----------------------------
-# # Step 1: Optional Model Download
-# # -----------------------------
-# def download_models():
-#     expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
-#     if not Path(expected_model).exists():
-#         print("⚙️ Downloading pretrained models...")
-#         try:
-#             subprocess.check_call(["bash", "download/download_models.sh"])
-#             print("✅ Models downloaded.")
-#         except subprocess.CalledProcessError as e:
-#             print(f"❌ Model download failed: {e}")
-#     else:
-#         print("✅ Pretrained models already exist.")
-# download_models()
-# # -----------------------------
-# # Step 2: Inference Logic
-# # -----------------------------
-# def run_epic_inference(video_path, caption, motion_type):
-#     temp_input_path = "/app/temp_input.mp4"
-#     output_dir = f"/app/output_anchor"
-#     video_output_path = f"{output_dir}/masked_videos/output.mp4"
-#     traj_name = motion_type
-#     traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
-#     # Save uploaded video
-#     if video_path:
-#         os.system(f"cp '{video_path}' {temp_input_path}")
-#     command = [
-#     "python", "/app/inference/v2v_data/inference.py",
-#     "--video_path", temp_input_path,
-#     "--stride", "1",
-#     "--out_dir", output_dir,
-#     "--radius_scale", "1",
-#     "--camera", "target",
-#     "--mask",
-#     "--target_pose", "0", "30", "-0.6", "0", "0",
-#     "--traj_txt", traj_txt,
-#     "--save_name", "output",
-#     "--mode", "gradual",
-#     ]
-#     # Run inference command
-#     try:
-#         result = subprocess.run(command, capture_output=True, text=True, check=True)
-#         print("Getting Anchor Videos run successfully.")
-#         logs = result.stdout
-#     except subprocess.CalledProcessError as e:
-#         logs = f"❌ Inference failed:\n{e.stderr}"
-#         return logs, None
-#     # Locate the output video
-#     if video_output_path:
-#         return logs, str(video_output_path)
-#     else:
-#         return f"Inference succeeded but no output video found in {output_dir}", None
-# def print_output_directory(out_dir):
-#     result = ""
-#     for root, dirs, files in os.walk(out_dir):
-#         level = root.replace(out_dir, '').count(os.sep)
-#         indent = ' ' * 4 * level
-#         result += f"{indent}{os.path.basename(root)}/"
-#         sub_indent = ' ' * 4 * (level + 1)
-#         for f in files:
-#             result += f"{sub_indent}{f}\n"
-#     return result
-# def inference(video_path, caption, motion_type):
-#     logs, video_masked = run_epic_inference(video_path, caption, motion_type)
-#     MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
-#     ckpt_steps=500
-#     ckpt_dir="/app/out/EPiC_pretrained"
-#     ckpt_file=f"checkpoint-{ckpt_steps}.pt"
-#     ckpt_path=f"{ckpt_dir}/{ckpt_file}"
-#     video_root_dir= f"/app/output_anchor"
-#     out_dir=f"/app/output"
-#     command = [
-#         "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
-#         "--video_root_dir", video_root_dir,
-#         "--base_model_path", MODEL_PATH,
-#         "--controlnet_model_path", ckpt_path,
-#         "--output_path", out_dir,
-#         "--start_camera_idx", "0",
-#         "--end_camera_idx", "8",
-#         "--controlnet_weights", "1.0",
-#         "--controlnet_guidance_start", "0.0",
-#         "--controlnet_guidance_end", "0.4",
-#         "--controlnet_input_channels", "3",
-#         "--controlnet_transformer_num_attn_heads", "4",
-#         "--controlnet_transformer_attention_head_dim", "64",
-#         "--controlnet_transformer_out_proj_dim_factor", "64",
-#         "--controlnet_transformer_out_proj_dim_zero_init",
-#         "--vae_channels", "16",
-#         "--num_frames", "49",
-#         "--controlnet_transformer_num_layers", "8",
-#         "--infer_with_mask",
-#         "--pool_style", "max",
-#         "--seed", "43"
-#     ]
-#     # Run the command
-#     result = subprocess.run(command, capture_output=True, text=True)
-#     if result.returncode == 0:
-#         print("Inference completed successfully.")
-#     else:
-#         print(f"Error occurred during inference: {result.stderr}")
-#     # Print output directory contents
-#     logs = result.stdout
-#     result = print_output_directory(out_dir)
-#     return logs+result, str(f"{out_dir}/00000_43_out.mp4")
-# # output 43
-# # output/    00000_43_out.mp4
-# #     00000_43_reference.mp4
-# #     00000_43_out_reference.mp4
-# # -----------------------------
-# # Step 3: Create Gradio UI
-# # -----------------------------
-# demo = gr.Interface(
-#     fn=inference,
-#     inputs=[
-#         gr.Video(label="Upload Video (MP4)"),
-#         gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
-#         gr.Dropdown(
-#             choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
-#             label="Camera Motion Type",
-#             value="zoom_in",
-#         ),
-#     ],
-#     outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
-#     title="🎬 EPiC: Efficient Video Camera Control",
-#     description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
-# )
-# # -----------------------------
-# # Step 4: Launch App
-# # -----------------------------
-# if __name__ == "__main__":
-#     demo.launch(server_name="0.0.0.0", server_port=7860)
 import os
 import subprocess
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
 # -----------------------------
 # Setup paths and env
@@ -206,6 +37,12 @@ download_models()
 # -----------------------------
 # Step 2: Inference Logic
 # -----------------------------
 def run_epic_inference(video_path, fps, num_frames, target_pose, mode):
     temp_input_path = "/app/temp_input.mp4"
     output_dir = "/app/output_anchor"

 import os
 import subprocess
 from datetime import datetime
 from pathlib import Path
 import gradio as gr
+import numpy as np
 # -----------------------------
 # Setup paths and env
 # -----------------------------
 # Step 2: Inference Logic
 # -----------------------------
+def estimate_near_far(depths, lower_percentile=5, upper_percentile=95):
+    flat = depths.flatten()
+    near = np.percentile(flat, lower_percentile)
+    far = np.percentile(flat, upper_percentile)
+    return near, far
 def run_epic_inference(video_path, fps, num_frames, target_pose, mode):
     temp_input_path = "/app/temp_input.mp4"
     output_dir = "/app/output_anchor"

inference/v2v_data/models/infer.py CHANGED Viewed

@@ -49,6 +49,12 @@ class DepthCrafterDemo:
             print("Xformers is not enabled")
         self.pipe.enable_attention_slicing()
     def infer(
         self,
         frames,
@@ -87,6 +93,9 @@ class DepthCrafterDemo:
         depths *= 3900  # compatible with da output
         depths[depths < 1e-5] = 1e-5
         depths = 10000.0 / depths
         depths = depths.clip(near, far)
         return depths

             print("Xformers is not enabled")
         self.pipe.enable_attention_slicing()
+    def estimate_near_far(self, depths, lower_percentile=5, upper_percentile=95):
+        flat = depths.flatten()
+        near = np.percentile(flat, lower_percentile)
+        far = np.percentile(flat, upper_percentile)
+        return near, far
     def infer(
         self,
         frames,
         depths *= 3900  # compatible with da output
         depths[depths < 1e-5] = 1e-5
         depths = 10000.0 / depths
+        near, far = self.estimate_near_far(depths)
+        print(f"Estimated near: {near}, far: {far}")
         depths = depths.clip(near, far)
         return depths