Muhammad Taqi Raza commited on
Commit
c6141d6
·
1 Parent(s): 2caa0db

adding camera offsets values

Browse files
gradio_app.py CHANGED
@@ -13,8 +13,8 @@ os.environ["HF_HOME"] = HF_HOME
13
  os.environ["TRANSFORMERS_CACHE"] = HF_HOME
14
  os.makedirs(HF_HOME, exist_ok=True)
15
 
16
- hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
17
- snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
18
 
19
  PRETRAINED_DIR = "/app/pretrained"
20
  os.makedirs(PRETRAINED_DIR, exist_ok=True)
@@ -113,7 +113,7 @@ def inference(
113
  fps, num_frames, controlnet_weights, controlnet_guidance_start,
114
  controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
115
  seed, height, width, downscale_coef, vae_channels,
116
- controlnet_input_channels, controlnet_transformer_num_layers, upscale, upscale_factor, refine
117
  ):
118
  MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
119
  ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
@@ -144,12 +144,6 @@ def inference(
144
 
145
  ]
146
 
147
- if upscale:
148
- command.extend(["--upscale", "--upscale_factor", str(upscale_factor)])
149
-
150
- if refine:
151
- command.append("--refine")
152
-
153
  try:
154
  result = subprocess.run(command, capture_output=True, text=True, check=True)
155
  logs = result.stdout
@@ -258,7 +252,7 @@ with demo:
258
  inference_steps_input, dtype_input, seed_input2,
259
  height_input, width_input, downscale_coef_input,
260
  vae_channels_input, controlnet_input_channels_input,
261
- controlnet_layers_input, upscale, upscale_factor, refine
262
  ],
263
  outputs=[step2_video, step2_logs]
264
  )
 
13
  os.environ["TRANSFORMERS_CACHE"] = HF_HOME
14
  os.makedirs(HF_HOME, exist_ok=True)
15
 
16
+ # hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
17
+ # snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
18
 
19
  PRETRAINED_DIR = "/app/pretrained"
20
  os.makedirs(PRETRAINED_DIR, exist_ok=True)
 
113
  fps, num_frames, controlnet_weights, controlnet_guidance_start,
114
  controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
115
  seed, height, width, downscale_coef, vae_channels,
116
+ controlnet_input_channels, controlnet_transformer_num_layers
117
  ):
118
  MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
119
  ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
 
144
 
145
  ]
146
 
 
 
 
 
 
 
147
  try:
148
  result = subprocess.run(command, capture_output=True, text=True, check=True)
149
  logs = result.stdout
 
252
  inference_steps_input, dtype_input, seed_input2,
253
  height_input, width_input, downscale_coef_input,
254
  vae_channels_input, controlnet_input_channels_input,
255
+ controlnet_layers_input
256
  ],
257
  outputs=[step2_video, step2_logs]
258
  )
inference/cli_demo_camera_i2v_pcd.py CHANGED
@@ -37,8 +37,6 @@ import numpy as np
37
  import torch
38
 
39
  device = "cuda" if torch.cuda.is_available() else "cpu"
40
- upscale_model = utils.load_sd_upscale("model_real_esran/RealESRGAN_x4.pth", device)
41
- frame_interpolation_model = load_rife_model("model_rife")
42
 
43
  def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
44
  """
@@ -174,9 +172,7 @@ def generate_video(
174
  pool_style: str = 'avg',
175
  pipe_cpu_offload: bool = False,
176
  fps: int = 8,
177
- upscale: bool = False,
178
- upscale_factor: int = 4,
179
- refine: bool = False,
180
  ):
181
  """
182
  Generates a video based on the given prompt and saves it to the specified path.
@@ -369,57 +365,6 @@ def generate_video(
369
  width=width, # Width of the generated video
370
  ).frames
371
 
372
- # ++++++++++++++++++++++++++++++++++++++
373
- latents = video_generate_all # This is a latent
374
-
375
- to_tensor = T.ToTensor()
376
- latents = [
377
- torch.stack([to_tensor(img) for img in sublist]) # [T, C, H, W]
378
- for sublist in latents # original input
379
- ]
380
-
381
- latents = torch.stack(latents) # [B, T, C, H, W]
382
- latents = latents.to(device)
383
-
384
-
385
- print(f"Type of latents: {type(latents)}")
386
- print(f"Length of latents: {len(latents)}")
387
-
388
- # Print detailed info about each item
389
- for i, item in enumerate(latents):
390
- print(f"\nItem {i}:")
391
- print(f" Type: {type(item)}")
392
- if isinstance(item, torch.Tensor):
393
- print(f" Shape: {item.shape}")
394
- print(f" Dtype: {item.dtype}")
395
- print(f" Device: {item.device}")
396
- elif isinstance(item, np.ndarray):
397
- print(f" Shape: {item.shape}")
398
- print(f" Dtype: {item.dtype}")
399
- elif hasattr(item, 'size') and callable(item.size): # For PIL images
400
- print(f" Size (WxH): {item.size}")
401
- print(f" Mode: {item.mode}")
402
- else:
403
- print(f" Value: {item}")
404
-
405
-
406
- if upscale:
407
- latents = utils.upscale_batch_and_concatenate(upscale_model, latents, device, upscale_factor=upscale_factor)
408
- if refine:
409
- latents = rife_inference_with_latents(frame_interpolation_model, latents) # upscale here is assigned 1.
410
-
411
-
412
- # Convert latents back to PIL images after processing
413
- latents = latents.clamp(0, 1) # Clamp values to [0,1]
414
- to_pil = T.ToPILImage()
415
- latents = [
416
- [to_pil(frame.cpu()) for frame in video] # video: Tensor[T, C, H, W]
417
- for video in latents
418
- ]
419
- video_generate_all = latents
420
-
421
- # ++++++++++++++++++++++++++++++++++++++
422
-
423
  video_generate = video_generate_all[0]
424
 
425
  # 6. Export the generated frames to a video file. fps must be 8 for original video.
@@ -491,14 +436,6 @@ if __name__ == "__main__":
491
  parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
492
  parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
493
 
494
- parser.add_argument("--upscale", action="store_true", default=False, help="Enable upscaling of the output video")
495
- parser.add_argument("--upscale_factor", type=int, default=4, help="Factor by which to upscale the output video")
496
- parser.add_argument("--refine", action="store_true", default=False, help="Enable refinement of the output video")
497
-
498
- # "--upscale", str(upscale),
499
- # "--upscale_factor", str(upscale_factor),
500
- # "--refine", str(refine),
501
-
502
  args = parser.parse_args()
503
  dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
504
  generate_video(
 
37
  import torch
38
 
39
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
40
 
41
  def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
42
  """
 
172
  pool_style: str = 'avg',
173
  pipe_cpu_offload: bool = False,
174
  fps: int = 8,
175
+
 
 
176
  ):
177
  """
178
  Generates a video based on the given prompt and saves it to the specified path.
 
365
  width=width, # Width of the generated video
366
  ).frames
367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  video_generate = video_generate_all[0]
369
 
370
  # 6. Export the generated frames to a video file. fps must be 8 for original video.
 
436
  parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
437
  parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
438
 
 
 
 
 
 
 
 
 
439
  args = parser.parse_args()
440
  dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
441
  generate_video(