Muhammad Taqi Raza
commited on
Commit
·
c6141d6
1
Parent(s):
2caa0db
adding camera offsets values
Browse files- gradio_app.py +4 -10
- inference/cli_demo_camera_i2v_pcd.py +1 -64
gradio_app.py
CHANGED
@@ -13,8 +13,8 @@ os.environ["HF_HOME"] = HF_HOME
|
|
13 |
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
|
14 |
os.makedirs(HF_HOME, exist_ok=True)
|
15 |
|
16 |
-
hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
|
17 |
-
snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
18 |
|
19 |
PRETRAINED_DIR = "/app/pretrained"
|
20 |
os.makedirs(PRETRAINED_DIR, exist_ok=True)
|
@@ -113,7 +113,7 @@ def inference(
|
|
113 |
fps, num_frames, controlnet_weights, controlnet_guidance_start,
|
114 |
controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
|
115 |
seed, height, width, downscale_coef, vae_channels,
|
116 |
-
controlnet_input_channels, controlnet_transformer_num_layers
|
117 |
):
|
118 |
MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
|
119 |
ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
|
@@ -144,12 +144,6 @@ def inference(
|
|
144 |
|
145 |
]
|
146 |
|
147 |
-
if upscale:
|
148 |
-
command.extend(["--upscale", "--upscale_factor", str(upscale_factor)])
|
149 |
-
|
150 |
-
if refine:
|
151 |
-
command.append("--refine")
|
152 |
-
|
153 |
try:
|
154 |
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
155 |
logs = result.stdout
|
@@ -258,7 +252,7 @@ with demo:
|
|
258 |
inference_steps_input, dtype_input, seed_input2,
|
259 |
height_input, width_input, downscale_coef_input,
|
260 |
vae_channels_input, controlnet_input_channels_input,
|
261 |
-
controlnet_layers_input
|
262 |
],
|
263 |
outputs=[step2_video, step2_logs]
|
264 |
)
|
|
|
13 |
os.environ["TRANSFORMERS_CACHE"] = HF_HOME
|
14 |
os.makedirs(HF_HOME, exist_ok=True)
|
15 |
|
16 |
+
# hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
|
17 |
+
# snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
18 |
|
19 |
PRETRAINED_DIR = "/app/pretrained"
|
20 |
os.makedirs(PRETRAINED_DIR, exist_ok=True)
|
|
|
113 |
fps, num_frames, controlnet_weights, controlnet_guidance_start,
|
114 |
controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
|
115 |
seed, height, width, downscale_coef, vae_channels,
|
116 |
+
controlnet_input_channels, controlnet_transformer_num_layers
|
117 |
):
|
118 |
MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
|
119 |
ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
|
|
|
144 |
|
145 |
]
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
try:
|
148 |
result = subprocess.run(command, capture_output=True, text=True, check=True)
|
149 |
logs = result.stdout
|
|
|
252 |
inference_steps_input, dtype_input, seed_input2,
|
253 |
height_input, width_input, downscale_coef_input,
|
254 |
vae_channels_input, controlnet_input_channels_input,
|
255 |
+
controlnet_layers_input
|
256 |
],
|
257 |
outputs=[step2_video, step2_logs]
|
258 |
)
|
inference/cli_demo_camera_i2v_pcd.py
CHANGED
@@ -37,8 +37,6 @@ import numpy as np
|
|
37 |
import torch
|
38 |
|
39 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
40 |
-
upscale_model = utils.load_sd_upscale("model_real_esran/RealESRGAN_x4.pth", device)
|
41 |
-
frame_interpolation_model = load_rife_model("model_rife")
|
42 |
|
43 |
def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
|
44 |
"""
|
@@ -174,9 +172,7 @@ def generate_video(
|
|
174 |
pool_style: str = 'avg',
|
175 |
pipe_cpu_offload: bool = False,
|
176 |
fps: int = 8,
|
177 |
-
|
178 |
-
upscale_factor: int = 4,
|
179 |
-
refine: bool = False,
|
180 |
):
|
181 |
"""
|
182 |
Generates a video based on the given prompt and saves it to the specified path.
|
@@ -369,57 +365,6 @@ def generate_video(
|
|
369 |
width=width, # Width of the generated video
|
370 |
).frames
|
371 |
|
372 |
-
# ++++++++++++++++++++++++++++++++++++++
|
373 |
-
latents = video_generate_all # This is a latent
|
374 |
-
|
375 |
-
to_tensor = T.ToTensor()
|
376 |
-
latents = [
|
377 |
-
torch.stack([to_tensor(img) for img in sublist]) # [T, C, H, W]
|
378 |
-
for sublist in latents # original input
|
379 |
-
]
|
380 |
-
|
381 |
-
latents = torch.stack(latents) # [B, T, C, H, W]
|
382 |
-
latents = latents.to(device)
|
383 |
-
|
384 |
-
|
385 |
-
print(f"Type of latents: {type(latents)}")
|
386 |
-
print(f"Length of latents: {len(latents)}")
|
387 |
-
|
388 |
-
# Print detailed info about each item
|
389 |
-
for i, item in enumerate(latents):
|
390 |
-
print(f"\nItem {i}:")
|
391 |
-
print(f" Type: {type(item)}")
|
392 |
-
if isinstance(item, torch.Tensor):
|
393 |
-
print(f" Shape: {item.shape}")
|
394 |
-
print(f" Dtype: {item.dtype}")
|
395 |
-
print(f" Device: {item.device}")
|
396 |
-
elif isinstance(item, np.ndarray):
|
397 |
-
print(f" Shape: {item.shape}")
|
398 |
-
print(f" Dtype: {item.dtype}")
|
399 |
-
elif hasattr(item, 'size') and callable(item.size): # For PIL images
|
400 |
-
print(f" Size (WxH): {item.size}")
|
401 |
-
print(f" Mode: {item.mode}")
|
402 |
-
else:
|
403 |
-
print(f" Value: {item}")
|
404 |
-
|
405 |
-
|
406 |
-
if upscale:
|
407 |
-
latents = utils.upscale_batch_and_concatenate(upscale_model, latents, device, upscale_factor=upscale_factor)
|
408 |
-
if refine:
|
409 |
-
latents = rife_inference_with_latents(frame_interpolation_model, latents) # upscale here is assigned 1.
|
410 |
-
|
411 |
-
|
412 |
-
# Convert latents back to PIL images after processing
|
413 |
-
latents = latents.clamp(0, 1) # Clamp values to [0,1]
|
414 |
-
to_pil = T.ToPILImage()
|
415 |
-
latents = [
|
416 |
-
[to_pil(frame.cpu()) for frame in video] # video: Tensor[T, C, H, W]
|
417 |
-
for video in latents
|
418 |
-
]
|
419 |
-
video_generate_all = latents
|
420 |
-
|
421 |
-
# ++++++++++++++++++++++++++++++++++++++
|
422 |
-
|
423 |
video_generate = video_generate_all[0]
|
424 |
|
425 |
# 6. Export the generated frames to a video file. fps must be 8 for original video.
|
@@ -491,14 +436,6 @@ if __name__ == "__main__":
|
|
491 |
parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
|
492 |
parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
|
493 |
|
494 |
-
parser.add_argument("--upscale", action="store_true", default=False, help="Enable upscaling of the output video")
|
495 |
-
parser.add_argument("--upscale_factor", type=int, default=4, help="Factor by which to upscale the output video")
|
496 |
-
parser.add_argument("--refine", action="store_true", default=False, help="Enable refinement of the output video")
|
497 |
-
|
498 |
-
# "--upscale", str(upscale),
|
499 |
-
# "--upscale_factor", str(upscale_factor),
|
500 |
-
# "--refine", str(refine),
|
501 |
-
|
502 |
args = parser.parse_args()
|
503 |
dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
|
504 |
generate_video(
|
|
|
37 |
import torch
|
38 |
|
39 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
40 |
|
41 |
def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
|
42 |
"""
|
|
|
172 |
pool_style: str = 'avg',
|
173 |
pipe_cpu_offload: bool = False,
|
174 |
fps: int = 8,
|
175 |
+
|
|
|
|
|
176 |
):
|
177 |
"""
|
178 |
Generates a video based on the given prompt and saves it to the specified path.
|
|
|
365 |
width=width, # Width of the generated video
|
366 |
).frames
|
367 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
video_generate = video_generate_all[0]
|
369 |
|
370 |
# 6. Export the generated frames to a video file. fps must be 8 for original video.
|
|
|
436 |
parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
|
437 |
parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
|
438 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
args = parser.parse_args()
|
440 |
dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
|
441 |
generate_video(
|