Spaces:

fffiloni
/

ReNO

Runtime error

App Files Files Community

fffiloni commited on Oct 18, 2024

Commit

ea50b7d

verified ·

1 Parent(s): 7eee4f5

better GPU memory management

Browse files

Files changed (1) hide show

main.py +100 -24

main.py CHANGED Viewed

@@ -15,35 +15,83 @@ from rewards import get_reward_losses
 from training import LatentNoiseTrainer, get_optimizer
-import torch
-import gc
 def clear_gpu():
     """Clear GPU memory by removing tensors, freeing cache, and moving data to CPU."""
     # List memory usage before clearing
     print(f"Memory allocated before clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
     print(f"Memory reserved before clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
-    # Force the garbage collector to free unreferenced objects
-    gc.collect()
     # Move any bound tensors back to CPU if needed
     if torch.cuda.is_available():
-        torch.cuda.empty_cache()  # Free up the cached memory
-        torch.cuda.ipc_collect()  # Clear any cross-process memory
     print(f"Memory allocated after clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
     print(f"Memory reserved after clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
 def unload_previous_model_if_needed(loaded_model_setup):
     """Unload the current model from the GPU and free resources if a new model is being loaded."""
-    if loaded_model_setup is not None:
-        print("Unloading previous model from GPU to free memory.")
-        previous_model = loaded_model_setup[7]  # Assuming pipe is at position [7] in the setup
-        if hasattr(previous_model, 'to') and loaded_model_setup[0].model != "flux":
             previous_model.to('cpu')  # Move model to CPU to free GPU memory
-        del previous_model  # Delete the reference to the model
-        clear_gpu()  # Clear all remaining GPU memory
 def setup(args, loaded_model_setup=None):
     seed_everything(args.seed)
@@ -144,7 +192,6 @@ def setup(args, loaded_model_setup=None):
     # Final memory cleanup after model loading
     torch.cuda.empty_cache()
-    gc.collect()
     trainer = LatentNoiseTrainer(
         reward_losses=reward_losses,
@@ -188,7 +235,7 @@ def setup(args, loaded_model_setup=None):
     # Final memory cleanup
     torch.cuda.empty_cache()  # Free up cached memory
-    gc.collect()
@@ -200,23 +247,30 @@ def setup(args, loaded_model_setup=None):
 def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pipe, progress_callback=None):
     if args.task == "single":
         # Attempt to move the model to GPU if model is not Flux
         if args.model != "flux":
-            if args.model != "pixart":
                 if pipe.device != torch.device('cuda'):
-                    pipe.to(device, dtype)
-            else:
                 if pipe.device != torch.device('cuda'):
                     pipe.to(device)
         else:
-            print(f"PIPE:{pipe}")
             if args.cpu_offloading:
                 pipe.enable_sequential_cpu_offload()
-            #if pipe.device != torch.device('cuda'):
-            #    pipe.to(device, dtype)
         if args.enable_multi_apply:
@@ -232,8 +286,8 @@ def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pip
             multi_apply_fn = None
         torch.cuda.empty_cache()  # Free up cached memory
-        gc.collect()
         init_latents = torch.randn(shape, device=device, dtype=dtype)
         latents = torch.nn.Parameter(init_latents, requires_grad=enable_grad)
@@ -246,6 +300,28 @@ def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pip
         best_image.save(f"{save_dir}/best_image.png")
         #init_image.save(f"{save_dir}/init_image.png")
     elif args.task == "example-prompts":
         fo = open("assets/example_prompts.txt", "r")
         prompts = fo.readlines()

 from training import LatentNoiseTrainer, get_optimizer
+def find_and_move_object_to_cpu():
+    for obj in gc.get_objects():
+        try:
+            # Check if the object is a PyTorch model
+            if isinstance(obj, torch.nn.Module):
+                # Check if any parameter of the model is on CUDA
+                if any(param.is_cuda for param in obj.parameters()):
+                    print(f"Found PyTorch model on CUDA: {type(obj).__name__}")
+                    # Move the model to CPU
+                    obj.to('cpu')
+                    print(f"Moved {type(obj).__name__} to CPU.")
+                # Optionally check if buffers are on CUDA
+                if any(buf.is_cuda for buf in obj.buffers()):
+                    print(f"Found buffer on CUDA in {type(obj).__name__}")
+                    obj.to('cpu')
+                    print(f"Moved buffers of {type(obj).__name__} to CPU.")
+        except Exception as e:
+            # Handle any exceptions if obj is not a torch model
+            pass
 def clear_gpu():
     """Clear GPU memory by removing tensors, freeing cache, and moving data to CPU."""
     # List memory usage before clearing
     print(f"Memory allocated before clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
     print(f"Memory reserved before clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
     # Move any bound tensors back to CPU if needed
     if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()  # Ensure that all operations are completed
+        print("GPU memory cleared.")
     print(f"Memory allocated after clearing: {torch.cuda.memory_allocated() / (1024 ** 2)} MB")
     print(f"Memory reserved after clearing: {torch.cuda.memory_reserved() / (1024 ** 2)} MB")
 def unload_previous_model_if_needed(loaded_model_setup):
+    # Check if any GPU memory is being used even when loaded_model_setup is None
+    if loaded_model_setup is None:
+        if torch.cuda.is_available() and torch.cuda.memory_allocated() > 0:
+            print("Unknown model or tensors are still loaded on the GPU. Clearing GPU memory.")
+            # Call the function to find and move object to CPU
+            find_and_move_object_to_cpu()
+        return
     """Unload the current model from the GPU and free resources if a new model is being loaded."""
+    print("Unloading previous model from GPU to free memory.")
+    """
+    previous_model = loaded_model_setup[7]  # Assuming pipe is at position [7] in the setup
+    # If the model is 'hyper-sd', ensure its components are moved to CPU before deletion
+    if loaded_model_setup[0].model == "hyper-sd":
+        if previous_model.device == torch.device('cuda'):
+            if hasattr(previous_model, 'unet'):
+                print("Moving UNet back to CPU.")
+                previous_model.unet.to('cpu')  # Move unet to CPU
+            print("Moving entire pipeline back to CPU.")
+            previous_model.to('cpu')  # Move the entire pipeline (pipe) to CPU
+    # For other models, use a generic 'to' function if available
+    elif hasattr(previous_model, 'to') and loaded_model_setup[0].model != "flux":
+        if previous_model.device == torch.device('cuda'):
+            print("Moving previous model back to CPU.")
             previous_model.to('cpu')  # Move model to CPU to free GPU memory
+    # Delete the reference to the model to allow garbage collection
+    del previous_model
+    """
+    # Call the function to find and move object to CPU
+    find_and_move_object_to_cpu()
+    # Clear GPU memory
+    clear_gpu()  # Ensure that this function properly clears memory (e.g., torch.cuda.empty_cache())
 def setup(args, loaded_model_setup=None):
     seed_everything(args.seed)
     # Final memory cleanup after model loading
     torch.cuda.empty_cache()
     trainer = LatentNoiseTrainer(
         reward_losses=reward_losses,
     # Final memory cleanup
     torch.cuda.empty_cache()  # Free up cached memory
 def execute_task(args, trainer, device, dtype, shape, enable_grad, settings, pipe, progress_callback=None):
     if args.task == "single":
         # Attempt to move the model to GPU if model is not Flux
         if args.model != "flux":
+            if args.model == "hyper-sd":
                 if pipe.device != torch.device('cuda'):
+                    # Transfer UNet to GPU
+                    pipe.unet = pipe.unet.to(device, dtype)
+                    # Transfer the whole pipe to GPU, if required (optional)
+                    pipe = pipe.to(device, dtype)
+                    # upcast vae
+                    pipe.vae = pipe.vae.to(dtype=torch.float32)
+            elif args.model == "pixart":
                 if pipe.device != torch.device('cuda'):
                     pipe.to(device)
+            else:
+                if pipe.device != torch.device('cuda'):
+                    pipe.to(device, dtype)
         else:
             if args.cpu_offloading:
                 pipe.enable_sequential_cpu_offload()
         if args.enable_multi_apply:
             multi_apply_fn = None
         torch.cuda.empty_cache()  # Free up cached memory
+        print(f"PIPE:{pipe}")
         init_latents = torch.randn(shape, device=device, dtype=dtype)
         latents = torch.nn.Parameter(init_latents, requires_grad=enable_grad)
         best_image.save(f"{save_dir}/best_image.png")
         #init_image.save(f"{save_dir}/init_image.png")
+        # Move the pipe back to CPU
+        if args.model != "flux":
+            if args.model == "hyper-sd":
+                if pipe.device == torch.device('cuda'):
+                    print("Moving the entire pipe back to CPU.")
+                    # Transfer UNet to GPU
+                    pipe.unet = pipe.unet.to("cpu")
+                    pipe.to('cpu')  # Move all components of the pipe back to CPU
+                    # Delete the pipe to free resources
+                    del pipe
+                    print("Pipe deleted to free resources.")
+            else:
+                if pipe.device == torch.device('cuda'):
+                    print("Moving the entire pipe back to CPU.")
+                    pipe.to("cpu")
+                    # Delete the pipe to free resources
+                    del pipe
+                    print("Pipe deleted to free resources.")
+        clear_gpu()
     elif args.task == "example-prompts":
         fo = open("assets/example_prompts.txt", "r")
         prompts = fo.readlines()