Spaces:

Kidbea
/

Kidbea_Image_Generation

Runtime error

App Files Files Community

Anurag Bhardwaj commited on Feb 17

Commit

2872348

verified ·

1 Parent(s): c34f45c

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -18

app.py CHANGED Viewed

@@ -9,15 +9,19 @@ from PIL import Image
 from torchvision import transforms
 from transformers import CLIPImageProcessor  # Updated import
 @lru_cache(maxsize=1)
 def load_pipeline():
-    # Load base model
     base_model = "black-forest-labs/FLUX.1-dev"
     pipe = DiffusionPipeline.from_pretrained(
         base_model,
-        torch_dtype=torch.float32
     )
     # Load LoRA weights
@@ -30,11 +34,15 @@ def load_pipeline():
     )
     image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
-    # Optimizations: enable memory efficient attention if using GPU
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     if device.type == "cuda":
-        pipe.enable_xformers_memory_efficient_attention()
-    pipe = pipe.to(device)
     return pipe, safety_checker, image_processor
@@ -43,8 +51,8 @@ pipe, safety_checker, image_processor = load_pipeline()
 def generate_image(
     prompt,
     seed=42,
-    width=1024,
-    height=1024,
     guidance_scale=6,
     steps=28,
     progress=gr.Progress()
@@ -58,9 +66,9 @@ def generate_image(
         if "super realism" not in prompt.lower():
             prompt = f"Super Realism, {prompt}"
-        # Define the callback function with the proper signature
-        def update_progress(step, timestep, latents):
-            progress((step + 1) / steps, desc="Generating image...")
         with torch.inference_mode():
             result = pipe(
@@ -70,7 +78,6 @@ def generate_image(
                 guidance_scale=guidance_scale,
                 num_inference_steps=steps,
                 generator=generator,
             )
             image = result.images[0]
@@ -100,8 +107,9 @@ with gr.Blocks() as app:
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
             seed_input = gr.Slider(0, 1000, value=42, label="Seed")
-            width_input = gr.Slider(512, 2048, value=1024, label="Width")
-            height_input = gr.Slider(512, 2048, value=1024, label="Height")
             guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
             steps_input = gr.Slider(10, 100, value=28, label="Steps")
             submit = gr.Button("Generate")
@@ -119,6 +127,5 @@ with gr.Blocks() as app:
     # Rate limiting: 1 request at a time, with a max queue size of 3
     app.queue(max_size=3).launch()
-# Uncomment for advanced multiple GPU support:
-# pipe.enable_model_cpu_offload()
 # pipe.enable_sequential_cpu_offload()

 from torchvision import transforms
 from transformers import CLIPImageProcessor  # Updated import
 @lru_cache(maxsize=1)
 def load_pipeline():
+    # Decide on torch_dtype based on device; use fp16 on CUDA to lower memory usage.
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
+    # Load the base model in the selected precision
     base_model = "black-forest-labs/FLUX.1-dev"
     pipe = DiffusionPipeline.from_pretrained(
         base_model,
+        torch_dtype=torch_dtype,
+        # low_cpu_mem_usage helps reduce CPU RAM usage during loading
+        low_cpu_mem_usage=True
     )
     # Load LoRA weights
     )
     image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    # If using CUDA, apply memory optimizations:
     if device.type == "cuda":
+        # Attention slicing splits up attention computations to save memory.
+        pipe.enable_attention_slicing()
+        # Instead of moving the entire model to GPU, offload parts to CPU when not needed.
+        # This is particularly useful on a 15GB GPU.
+        pipe.enable_model_cpu_offload()
+        # Note: xformers memory efficient attention is omitted here because
+        # model offload works best when not all weights are kept on GPU.
     return pipe, safety_checker, image_processor
 def generate_image(
     prompt,
     seed=42,
+    width=512,   # default resolution adjusted to 512 for safety
+    height=512,
     guidance_scale=6,
     steps=28,
     progress=gr.Progress()
         if "super realism" not in prompt.lower():
             prompt = f"Super Realism, {prompt}"
+        # Optional: you could add a progress callback here if your pipeline supports it.
+        # def update_progress(step, timestep, latents):
+        #     progress((step + 1) / steps, desc="Generating image...")
         with torch.inference_mode():
             result = pipe(
                 guidance_scale=guidance_scale,
                 num_inference_steps=steps,
                 generator=generator,
             )
             image = result.images[0]
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
             seed_input = gr.Slider(0, 1000, value=42, label="Seed")
+            # Limit resolution sliders to help avoid GPU memory overuse on a 15GB A100
+            width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
+            height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
             guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
             steps_input = gr.Slider(10, 100, value=28, label="Steps")
             submit = gr.Button("Generate")
     # Rate limiting: 1 request at a time, with a max queue size of 3
     app.queue(max_size=3).launch()
+# Advanced multiple GPU support (uncomment if needed):
 # pipe.enable_sequential_cpu_offload()