Spaces:

roll-ai
/

EPiC

Starting on L40S

App Files Files Community

Muhammad Taqi Raza commited on 5 days ago

Commit

c6141d6

1 Parent(s): 2caa0db

adding camera offsets values

Browse files

Files changed (2) hide show

gradio_app.py +4 -10
inference/cli_demo_camera_i2v_pcd.py +1 -64

gradio_app.py CHANGED Viewed

@@ -13,8 +13,8 @@ os.environ["HF_HOME"] = HF_HOME
 os.environ["TRANSFORMERS_CACHE"] = HF_HOME
 os.makedirs(HF_HOME, exist_ok=True)
-hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
-snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
 PRETRAINED_DIR = "/app/pretrained"
 os.makedirs(PRETRAINED_DIR, exist_ok=True)
@@ -113,7 +113,7 @@ def inference(
     fps, num_frames, controlnet_weights, controlnet_guidance_start,
     controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
     seed, height, width, downscale_coef, vae_channels,
-    controlnet_input_channels, controlnet_transformer_num_layers, upscale, upscale_factor, refine
 ):
     MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
     ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
@@ -144,12 +144,6 @@ def inference(
     ]
-    if upscale:
-        command.extend(["--upscale", "--upscale_factor", str(upscale_factor)])
-    if refine:
-        command.append("--refine")
     try:
         result = subprocess.run(command, capture_output=True, text=True, check=True)
         logs = result.stdout
@@ -258,7 +252,7 @@ with demo:
             inference_steps_input, dtype_input, seed_input2,
             height_input, width_input, downscale_coef_input,
             vae_channels_input, controlnet_input_channels_input,
-            controlnet_layers_input, upscale, upscale_factor, refine
         ],
         outputs=[step2_video, step2_logs]
     )

 os.environ["TRANSFORMERS_CACHE"] = HF_HOME
 os.makedirs(HF_HOME, exist_ok=True)
+# hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
+# snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
 PRETRAINED_DIR = "/app/pretrained"
 os.makedirs(PRETRAINED_DIR, exist_ok=True)
     fps, num_frames, controlnet_weights, controlnet_guidance_start,
     controlnet_guidance_end, guidance_scale, num_inference_steps, dtype,
     seed, height, width, downscale_coef, vae_channels,
+    controlnet_input_channels, controlnet_transformer_num_layers
 ):
     MODEL_PATH = "/app/pretrained/CogVideoX-5b-I2V"
     ckpt_path = "/app/out/EPiC_pretrained/checkpoint-500.pt"
     ]
     try:
         result = subprocess.run(command, capture_output=True, text=True, check=True)
         logs = result.stdout
             inference_steps_input, dtype_input, seed_input2,
             height_input, width_input, downscale_coef_input,
             vae_channels_input, controlnet_input_channels_input,
+            controlnet_layers_input
         ],
         outputs=[step2_video, step2_logs]
     )

inference/cli_demo_camera_i2v_pcd.py CHANGED Viewed

@@ -37,8 +37,6 @@ import numpy as np
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
-upscale_model = utils.load_sd_upscale("model_real_esran/RealESRGAN_x4.pth", device)
-frame_interpolation_model = load_rife_model("model_rife")
 def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
     """
@@ -174,9 +172,7 @@ def generate_video(
     pool_style: str = 'avg',
     pipe_cpu_offload: bool = False,
     fps: int = 8,
-    upscale: bool = False,
-    upscale_factor: int = 4,
-    refine: bool = False,
 ):
     """
     Generates a video based on the given prompt and saves it to the specified path.
@@ -369,57 +365,6 @@ def generate_video(
             width=width,  # Width of the generated video
         ).frames
-        # ++++++++++++++++++++++++++++++++++++++
-        latents = video_generate_all # This is a latent
-        to_tensor = T.ToTensor()
-        latents = [
-            torch.stack([to_tensor(img) for img in sublist])  # [T, C, H, W]
-            for sublist in latents  # original input
-        ]
-        latents = torch.stack(latents)  # [B, T, C, H, W]
-        latents = latents.to(device)
-        print(f"Type of latents: {type(latents)}")
-        print(f"Length of latents: {len(latents)}")
-        # Print detailed info about each item
-        for i, item in enumerate(latents):
-            print(f"\nItem {i}:")
-            print(f"  Type: {type(item)}")
-            if isinstance(item, torch.Tensor):
-                print(f"  Shape: {item.shape}")
-                print(f"  Dtype: {item.dtype}")
-                print(f"  Device: {item.device}")
-            elif isinstance(item, np.ndarray):
-                print(f"  Shape: {item.shape}")
-                print(f"  Dtype: {item.dtype}")
-            elif hasattr(item, 'size') and callable(item.size):  # For PIL images
-                print(f"  Size (WxH): {item.size}")
-                print(f"  Mode: {item.mode}")
-            else:
-                print(f"  Value: {item}")
-        if upscale:
-            latents = utils.upscale_batch_and_concatenate(upscale_model, latents, device, upscale_factor=upscale_factor)
-        if refine:
-            latents = rife_inference_with_latents(frame_interpolation_model, latents) # upscale here is assigned 1.
-        # Convert latents back to PIL images after processing
-        latents = latents.clamp(0, 1)  # Clamp values to [0,1]
-        to_pil = T.ToPILImage()
-        latents = [
-            [to_pil(frame.cpu()) for frame in video]  # video: Tensor[T, C, H, W]
-            for video in latents
-        ]
-        video_generate_all = latents
-        # ++++++++++++++++++++++++++++++++++++++
         video_generate = video_generate_all[0]
         # 6. Export the generated frames to a video file. fps must be 8 for original video.
@@ -491,14 +436,6 @@ if __name__ == "__main__":
     parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
     parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
-    parser.add_argument("--upscale", action="store_true", default=False, help="Enable upscaling of the output video")
-    parser.add_argument("--upscale_factor", type=int, default=4, help="Factor by which to upscale the output video")
-    parser.add_argument("--refine", action="store_true", default=False, help="Enable refinement of the output video")
-    #  "--upscale", str(upscale),
-    #     "--upscale_factor", str(upscale_factor),
-    #     "--refine", str(refine),
     args = parser.parse_args()
     dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
     generate_video(

 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def get_black_region_mask_tensor(video_tensor, threshold=2, kernel_size=15):
     """
     pool_style: str = 'avg',
     pipe_cpu_offload: bool = False,
     fps: int = 8,
 ):
     """
     Generates a video based on the given prompt and saves it to the specified path.
             width=width,  # Width of the generated video
         ).frames
         video_generate = video_generate_all[0]
         # 6. Export the generated frames to a video file. fps must be 8 for original video.
     parser.add_argument("--enable_model_cpu_offload", action="store_true", default=False, help="Enable model CPU offload")
     parser.add_argument("--fps", type=int, default=8, help="Frames per second for the output video")
     args = parser.parse_args()
     dtype = torch.float16 if args.dtype == "float16" else torch.bfloat16
     generate_video(