Muhammad Taqi Raza commited on
Commit
15db18d
·
1 Parent(s): 8e3cdd5

adding estimate near_far

Browse files
Files changed (2) hide show
  1. gradio_app.py +7 -170
  2. inference/v2v_data/models/infer.py +9 -0
gradio_app.py CHANGED
@@ -1,179 +1,10 @@
1
- # import os
2
- # import subprocess
3
- # from datetime import datetime
4
- # from pathlib import Path
5
- # import gradio as gr
6
-
7
- # # -----------------------------
8
- # # Setup paths and env
9
- # # -----------------------------
10
- # HF_HOME = "/app/hf_cache"
11
- # os.environ["HF_HOME"] = HF_HOME
12
- # os.environ["TRANSFORMERS_CACHE"] = HF_HOME
13
- # os.makedirs(HF_HOME, exist_ok=True)
14
-
15
- # PRETRAINED_DIR = "/app/pretrained"
16
- # os.makedirs(PRETRAINED_DIR, exist_ok=True)
17
-
18
-
19
- # # -----------------------------
20
- # # Step 1: Optional Model Download
21
- # # -----------------------------
22
- # def download_models():
23
- # expected_model = os.path.join(PRETRAINED_DIR, "RAFT/raft-things.pth")
24
- # if not Path(expected_model).exists():
25
- # print("⚙️ Downloading pretrained models...")
26
- # try:
27
- # subprocess.check_call(["bash", "download/download_models.sh"])
28
- # print("✅ Models downloaded.")
29
- # except subprocess.CalledProcessError as e:
30
- # print(f"❌ Model download failed: {e}")
31
- # else:
32
- # print("✅ Pretrained models already exist.")
33
-
34
-
35
- # download_models()
36
-
37
-
38
- # # -----------------------------
39
- # # Step 2: Inference Logic
40
- # # -----------------------------
41
-
42
- # def run_epic_inference(video_path, caption, motion_type):
43
- # temp_input_path = "/app/temp_input.mp4"
44
- # output_dir = f"/app/output_anchor"
45
- # video_output_path = f"{output_dir}/masked_videos/output.mp4"
46
- # traj_name = motion_type
47
- # traj_txt = f"/app/inference/v2v_data/test/trajs/{traj_name}.txt"
48
- # # Save uploaded video
49
- # if video_path:
50
- # os.system(f"cp '{video_path}' {temp_input_path}")
51
-
52
- # command = [
53
- # "python", "/app/inference/v2v_data/inference.py",
54
- # "--video_path", temp_input_path,
55
- # "--stride", "1",
56
- # "--out_dir", output_dir,
57
- # "--radius_scale", "1",
58
- # "--camera", "target",
59
- # "--mask",
60
- # "--target_pose", "0", "30", "-0.6", "0", "0",
61
- # "--traj_txt", traj_txt,
62
- # "--save_name", "output",
63
- # "--mode", "gradual",
64
- # ]
65
-
66
- # # Run inference command
67
- # try:
68
- # result = subprocess.run(command, capture_output=True, text=True, check=True)
69
- # print("Getting Anchor Videos run successfully.")
70
- # logs = result.stdout
71
- # except subprocess.CalledProcessError as e:
72
- # logs = f"❌ Inference failed:\n{e.stderr}"
73
- # return logs, None
74
-
75
- # # Locate the output video
76
- # if video_output_path:
77
- # return logs, str(video_output_path)
78
- # else:
79
- # return f"Inference succeeded but no output video found in {output_dir}", None
80
- # def print_output_directory(out_dir):
81
- # result = ""
82
- # for root, dirs, files in os.walk(out_dir):
83
- # level = root.replace(out_dir, '').count(os.sep)
84
- # indent = ' ' * 4 * level
85
- # result += f"{indent}{os.path.basename(root)}/"
86
- # sub_indent = ' ' * 4 * (level + 1)
87
- # for f in files:
88
- # result += f"{sub_indent}{f}\n"
89
- # return result
90
-
91
- # def inference(video_path, caption, motion_type):
92
- # logs, video_masked = run_epic_inference(video_path, caption, motion_type)
93
-
94
- # MODEL_PATH="/app/pretrained/CogVideoX-5b-I2V"
95
-
96
- # ckpt_steps=500
97
- # ckpt_dir="/app/out/EPiC_pretrained"
98
- # ckpt_file=f"checkpoint-{ckpt_steps}.pt"
99
- # ckpt_path=f"{ckpt_dir}/{ckpt_file}"
100
-
101
- # video_root_dir= f"/app/output_anchor"
102
- # out_dir=f"/app/output"
103
-
104
- # command = [
105
- # "python", "/app/inference/cli_demo_camera_i2v_pcd.py",
106
- # "--video_root_dir", video_root_dir,
107
- # "--base_model_path", MODEL_PATH,
108
- # "--controlnet_model_path", ckpt_path,
109
- # "--output_path", out_dir,
110
- # "--start_camera_idx", "0",
111
- # "--end_camera_idx", "8",
112
- # "--controlnet_weights", "1.0",
113
- # "--controlnet_guidance_start", "0.0",
114
- # "--controlnet_guidance_end", "0.4",
115
- # "--controlnet_input_channels", "3",
116
- # "--controlnet_transformer_num_attn_heads", "4",
117
- # "--controlnet_transformer_attention_head_dim", "64",
118
- # "--controlnet_transformer_out_proj_dim_factor", "64",
119
- # "--controlnet_transformer_out_proj_dim_zero_init",
120
- # "--vae_channels", "16",
121
- # "--num_frames", "49",
122
- # "--controlnet_transformer_num_layers", "8",
123
- # "--infer_with_mask",
124
- # "--pool_style", "max",
125
- # "--seed", "43"
126
- # ]
127
-
128
- # # Run the command
129
- # result = subprocess.run(command, capture_output=True, text=True)
130
- # if result.returncode == 0:
131
- # print("Inference completed successfully.")
132
- # else:
133
- # print(f"Error occurred during inference: {result.stderr}")
134
-
135
- # # Print output directory contents
136
- # logs = result.stdout
137
- # result = print_output_directory(out_dir)
138
-
139
- # return logs+result, str(f"{out_dir}/00000_43_out.mp4")
140
-
141
- # # output 43
142
- # # output/ 00000_43_out.mp4
143
- # # 00000_43_reference.mp4
144
- # # 00000_43_out_reference.mp4
145
-
146
- # # -----------------------------
147
- # # Step 3: Create Gradio UI
148
- # # -----------------------------
149
- # demo = gr.Interface(
150
- # fn=inference,
151
- # inputs=[
152
- # gr.Video(label="Upload Video (MP4)"),
153
- # gr.Textbox(label="Caption", placeholder="e.g., Amalfi coast with boats"),
154
- # gr.Dropdown(
155
- # choices=["zoom_in", "rotate", "orbit", "pan", "loop1"],
156
- # label="Camera Motion Type",
157
- # value="zoom_in",
158
- # ),
159
- # ],
160
- # outputs=[gr.Textbox(label="Inference Logs"), gr.Video(label="Generated Video")],
161
- # title="🎬 EPiC: Efficient Video Camera Control",
162
- # description="Upload a video, describe the scene, and apply cinematic camera motion using pretrained EPiC models.",
163
- # )
164
-
165
- # # -----------------------------
166
- # # Step 4: Launch App
167
- # # -----------------------------
168
- # if __name__ == "__main__":
169
- # demo.launch(server_name="0.0.0.0", server_port=7860)
170
-
171
 
172
  import os
173
  import subprocess
174
  from datetime import datetime
175
  from pathlib import Path
176
  import gradio as gr
 
177
 
178
  # -----------------------------
179
  # Setup paths and env
@@ -206,6 +37,12 @@ download_models()
206
  # -----------------------------
207
  # Step 2: Inference Logic
208
  # -----------------------------
 
 
 
 
 
 
209
  def run_epic_inference(video_path, fps, num_frames, target_pose, mode):
210
  temp_input_path = "/app/temp_input.mp4"
211
  output_dir = "/app/output_anchor"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  import os
3
  import subprocess
4
  from datetime import datetime
5
  from pathlib import Path
6
  import gradio as gr
7
+ import numpy as np
8
 
9
  # -----------------------------
10
  # Setup paths and env
 
37
  # -----------------------------
38
  # Step 2: Inference Logic
39
  # -----------------------------
40
+ def estimate_near_far(depths, lower_percentile=5, upper_percentile=95):
41
+ flat = depths.flatten()
42
+ near = np.percentile(flat, lower_percentile)
43
+ far = np.percentile(flat, upper_percentile)
44
+ return near, far
45
+
46
  def run_epic_inference(video_path, fps, num_frames, target_pose, mode):
47
  temp_input_path = "/app/temp_input.mp4"
48
  output_dir = "/app/output_anchor"
inference/v2v_data/models/infer.py CHANGED
@@ -49,6 +49,12 @@ class DepthCrafterDemo:
49
  print("Xformers is not enabled")
50
  self.pipe.enable_attention_slicing()
51
 
 
 
 
 
 
 
52
  def infer(
53
  self,
54
  frames,
@@ -87,6 +93,9 @@ class DepthCrafterDemo:
87
  depths *= 3900 # compatible with da output
88
  depths[depths < 1e-5] = 1e-5
89
  depths = 10000.0 / depths
 
 
 
90
  depths = depths.clip(near, far)
91
 
92
  return depths
 
49
  print("Xformers is not enabled")
50
  self.pipe.enable_attention_slicing()
51
 
52
+ def estimate_near_far(self, depths, lower_percentile=5, upper_percentile=95):
53
+ flat = depths.flatten()
54
+ near = np.percentile(flat, lower_percentile)
55
+ far = np.percentile(flat, upper_percentile)
56
+ return near, far
57
+
58
  def infer(
59
  self,
60
  frames,
 
93
  depths *= 3900 # compatible with da output
94
  depths[depths < 1e-5] = 1e-5
95
  depths = 10000.0 / depths
96
+
97
+ near, far = self.estimate_near_far(depths)
98
+ print(f"Estimated near: {near}, far: {far}")
99
  depths = depths.clip(near, far)
100
 
101
  return depths