Spaces:

Nick088
/

stable-diffusion-arena

Running on Zero

App Files Files Community

Nick088 commited on Jun 26, 2024

Commit

5f92874

verified ·

1 Parent(s): 1653571

Switching back to cpu offload instead of sequential one, empty cache

Browse files

Files changed (1) hide show

app.py +12 -7

app.py CHANGED Viewed

@@ -25,25 +25,25 @@ MAX_SEED = np.iinfo(np.int32).max
 sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained(
     "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16
 )
-sd3_medium_pipe.enable_sequential_cpu_offload()
 # sd 2.1
 sd2_1_pipe = StableDiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
 )
-sd2_1_pipe.enable_sequential_cpu_offload()
 # sdxl
 sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
 )
-sdxl_pipe.enable_sequential_cpu_offload()
 # sdxl flash
 sdxl_flash_pipe = StableDiffusionXLPipeline.from_pretrained(
     "sd-community/sdxl-flash", torch_dtype=torch.float16
 )
-sdxl_flash_pipe.enable_sequential_cpu_offload()
 # Ensure sampler uses "trailing" timesteps for sdxl flash.
 sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(
     sdxl_flash_pipe.scheduler.config, timestep_spacing="trailing"
@@ -53,18 +53,20 @@ sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(
 stable_cascade_prior_pipe = StableCascadePriorPipeline.from_pretrained(
     "stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16
 )
-stable_cascade_prior_pipe.enable_sequential_cpu_offload()
 stable_cascade_decoder_pipe = StableCascadeDecoderPipeline.from_pretrained(
     "stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.float16
 )
-stable_cascade_decoder_pipe.enable_sequential_cpu_offload()
 # sd 1.5
 sd1_5_pipe = StableDiffusionPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
 )
-sd1_5_pipe.enable_sequential_cpu_offload()
 # Helper function to generate images for a single model
 @spaces.GPU(duration=80)
@@ -134,6 +136,9 @@ def generate_single_image(
             num_images_per_prompt=num_images_per_prompt,
         ).images
     return output

 sd3_medium_pipe = StableDiffusion3Pipeline.from_pretrained(
     "stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16
 )
+sd3_medium_pipe.enable_cpu_offload()
 # sd 2.1
 sd2_1_pipe = StableDiffusionPipeline.from_pretrained(
     "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16
 )
+sd2_1_pipe.enable_cpu_offload()
 # sdxl
 sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
 )
+sdxl_pipe.enable_cpu_offload()
 # sdxl flash
 sdxl_flash_pipe = StableDiffusionXLPipeline.from_pretrained(
     "sd-community/sdxl-flash", torch_dtype=torch.float16
 )
+sdxl_flash_pipe.enable_cpu_offload()
 # Ensure sampler uses "trailing" timesteps for sdxl flash.
 sdxl_flash_pipe.scheduler = DPMSolverSinglestepScheduler.from_config(
     sdxl_flash_pipe.scheduler.config, timestep_spacing="trailing"
 stable_cascade_prior_pipe = StableCascadePriorPipeline.from_pretrained(
     "stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16
 )
+stable_cascade_prior_pipe.enable_cpu_offload()
 stable_cascade_decoder_pipe = StableCascadeDecoderPipeline.from_pretrained(
     "stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.float16
 )
+stable_cascade_decoder_pipe.enable_cpu_offload()
 # sd 1.5
 sd1_5_pipe = StableDiffusionPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
 )
+sd1_5_pipe.enable_cpu_offload()
+# empty cache to free up gpu memory before inference
+torch.cuda.empty_cache()
 # Helper function to generate images for a single model
 @spaces.GPU(duration=80)
             num_images_per_prompt=num_images_per_prompt,
         ).images
+    # empty cache to free up gpu memory
+    torch.cuda.empty_cache()
     return output