Spaces:

Anurag181011
/

Kidbea_Image_Generation

Running on Zero

App Files Files Community

Anurag181011 commited on Feb 17

Commit

a37a20c

verified ·

1 Parent(s): 2872348

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -31

app.py CHANGED Viewed

@@ -5,44 +5,39 @@ from diffusers import DiffusionPipeline
 from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from functools import lru_cache
 from PIL import Image
-from torchvision import transforms
-from transformers import CLIPImageProcessor  # Updated import
 @lru_cache(maxsize=1)
 def load_pipeline():
-    # Decide on torch_dtype based on device; use fp16 on CUDA to lower memory usage.
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
-    # Load the base model in the selected precision
     base_model = "black-forest-labs/FLUX.1-dev"
     pipe = DiffusionPipeline.from_pretrained(
         base_model,
         torch_dtype=torch_dtype,
-        # low_cpu_mem_usage helps reduce CPU RAM usage during loading
-        low_cpu_mem_usage=True
     )
     # Load LoRA weights
     lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
     pipe.load_lora_weights(lora_repo)
-    # Load safety checker and image processor
     safety_checker = StableDiffusionSafetyChecker.from_pretrained(
         "CompVis/stable-diffusion-safety-checker"
     )
     image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
-    # If using CUDA, apply memory optimizations:
     if device.type == "cuda":
-        # Attention slicing splits up attention computations to save memory.
         pipe.enable_attention_slicing()
-        # Instead of moving the entire model to GPU, offload parts to CPU when not needed.
-        # This is particularly useful on a 15GB GPU.
-        pipe.enable_model_cpu_offload()
-        # Note: xformers memory efficient attention is omitted here because
-        # model offload works best when not all weights are kept on GPU.
     return pipe, safety_checker, image_processor
@@ -51,7 +46,7 @@ pipe, safety_checker, image_processor = load_pipeline()
 def generate_image(
     prompt,
     seed=42,
-    width=512,   # default resolution adjusted to 512 for safety
     height=512,
     guidance_scale=6,
     steps=28,
@@ -66,10 +61,6 @@ def generate_image(
         if "super realism" not in prompt.lower():
             prompt = f"Super Realism, {prompt}"
-        # Optional: you could add a progress callback here if your pipeline supports it.
-        # def update_progress(step, timestep, latents):
-        #     progress((step + 1) / steps, desc="Generating image...")
         with torch.inference_mode():
             result = pipe(
                 prompt=prompt,
@@ -82,23 +73,24 @@ def generate_image(
             image = result.images[0]
         progress(1, desc="Safety checking...")
-        # Preprocess image for safety checking using the updated image processor
         safety_input = image_processor(image, return_tensors="pt")
         np_image = np.array(image)
-        # Unpack safety checker results
         _, nsfw_detected = safety_checker(
-            images=[np_image],
             clip_input=safety_input.pixel_values
         )
         if nsfw_detected[0]:
-            return Image.new("RGB", (512, 512)), "NSFW content detected"
         return image, "Generation successful"
     except Exception as e:
-        return Image.new("RGB", (512, 512)), f"Error: {str(e)}"
 with gr.Blocks() as app:
     gr.Markdown("# Flux Super Realism Generator")
@@ -107,7 +99,7 @@ with gr.Blocks() as app:
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
             seed_input = gr.Slider(0, 1000, value=42, label="Seed")
-            # Limit resolution sliders to help avoid GPU memory overuse on a 15GB A100
             width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
             height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
             guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
@@ -124,8 +116,5 @@ with gr.Blocks() as app:
         outputs=[output_image, status]
     )
-    # Rate limiting: 1 request at a time, with a max queue size of 3
     app.queue(max_size=3).launch()
-# Advanced multiple GPU support (uncomment if needed):
-# pipe.enable_sequential_cpu_offload()

 from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from functools import lru_cache
 from PIL import Image
+from transformers import CLIPImageProcessor
 @lru_cache(maxsize=1)
 def load_pipeline():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Use FP16 when CUDA is available, along with a revision flag if supported.
     torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
+    revision = "fp16" if device.type == "cuda" else None
     base_model = "black-forest-labs/FLUX.1-dev"
     pipe = DiffusionPipeline.from_pretrained(
         base_model,
         torch_dtype=torch_dtype,
+        low_cpu_mem_usage=True,
+        revision=revision,
     )
     # Load LoRA weights
     lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
     pipe.load_lora_weights(lora_repo)
+    # Load safety checker and image processor.
+    # If memory remains an issue, you can disable the safety checker below.
     safety_checker = StableDiffusionSafetyChecker.from_pretrained(
         "CompVis/stable-diffusion-safety-checker"
     )
     image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
     if device.type == "cuda":
+        # Use attention slicing for further memory savings.
         pipe.enable_attention_slicing()
+        # Offload layers to CPU when not in use.
+        pipe.enable_sequential_cpu_offload()
     return pipe, safety_checker, image_processor
 def generate_image(
     prompt,
     seed=42,
+    width=512,   # Keep resolution low by default
     height=512,
     guidance_scale=6,
     steps=28,
         if "super realism" not in prompt.lower():
             prompt = f"Super Realism, {prompt}"
         with torch.inference_mode():
             result = pipe(
                 prompt=prompt,
             image = result.images[0]
         progress(1, desc="Safety checking...")
+        # Process image for safety checking
         safety_input = image_processor(image, return_tensors="pt")
         np_image = np.array(image)
         _, nsfw_detected = safety_checker(
+            images=[np_image],
             clip_input=safety_input.pixel_values
         )
         if nsfw_detected[0]:
+            return Image.new("RGB", (width, height)), "NSFW content detected"
+        # Clear CUDA cache
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
         return image, "Generation successful"
     except Exception as e:
+        return Image.new("RGB", (width, height)), f"Error: {str(e)}"
 with gr.Blocks() as app:
     gr.Markdown("# Flux Super Realism Generator")
         with gr.Column():
             prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
             seed_input = gr.Slider(0, 1000, value=42, label="Seed")
+            # Limit the resolution sliders to help avoid memory overuse.
             width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
             height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
             guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
         outputs=[output_image, status]
     )
+    # Queue settings to limit concurrent requests
     app.queue(max_size=3).launch()