Anurag181011 commited on
Commit
a37a20c
·
verified ·
1 Parent(s): 2872348

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -31
app.py CHANGED
@@ -5,44 +5,39 @@ from diffusers import DiffusionPipeline
5
  from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
6
  from functools import lru_cache
7
  from PIL import Image
8
-
9
- from torchvision import transforms
10
- from transformers import CLIPImageProcessor # Updated import
11
 
12
  @lru_cache(maxsize=1)
13
  def load_pipeline():
14
- # Decide on torch_dtype based on device; use fp16 on CUDA to lower memory usage.
15
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
16
  torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
 
17
 
18
- # Load the base model in the selected precision
19
  base_model = "black-forest-labs/FLUX.1-dev"
20
  pipe = DiffusionPipeline.from_pretrained(
21
  base_model,
22
  torch_dtype=torch_dtype,
23
- # low_cpu_mem_usage helps reduce CPU RAM usage during loading
24
- low_cpu_mem_usage=True
25
  )
26
 
27
  # Load LoRA weights
28
  lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
29
  pipe.load_lora_weights(lora_repo)
30
 
31
- # Load safety checker and image processor
 
32
  safety_checker = StableDiffusionSafetyChecker.from_pretrained(
33
  "CompVis/stable-diffusion-safety-checker"
34
  )
35
  image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
36
 
37
- # If using CUDA, apply memory optimizations:
38
  if device.type == "cuda":
39
- # Attention slicing splits up attention computations to save memory.
40
  pipe.enable_attention_slicing()
41
- # Instead of moving the entire model to GPU, offload parts to CPU when not needed.
42
- # This is particularly useful on a 15GB GPU.
43
- pipe.enable_model_cpu_offload()
44
- # Note: xformers memory efficient attention is omitted here because
45
- # model offload works best when not all weights are kept on GPU.
46
 
47
  return pipe, safety_checker, image_processor
48
 
@@ -51,7 +46,7 @@ pipe, safety_checker, image_processor = load_pipeline()
51
  def generate_image(
52
  prompt,
53
  seed=42,
54
- width=512, # default resolution adjusted to 512 for safety
55
  height=512,
56
  guidance_scale=6,
57
  steps=28,
@@ -66,10 +61,6 @@ def generate_image(
66
  if "super realism" not in prompt.lower():
67
  prompt = f"Super Realism, {prompt}"
68
 
69
- # Optional: you could add a progress callback here if your pipeline supports it.
70
- # def update_progress(step, timestep, latents):
71
- # progress((step + 1) / steps, desc="Generating image...")
72
-
73
  with torch.inference_mode():
74
  result = pipe(
75
  prompt=prompt,
@@ -82,23 +73,24 @@ def generate_image(
82
  image = result.images[0]
83
 
84
  progress(1, desc="Safety checking...")
85
- # Preprocess image for safety checking using the updated image processor
86
  safety_input = image_processor(image, return_tensors="pt")
87
  np_image = np.array(image)
88
-
89
- # Unpack safety checker results
90
  _, nsfw_detected = safety_checker(
91
- images=[np_image],
92
  clip_input=safety_input.pixel_values
93
  )
94
 
95
  if nsfw_detected[0]:
96
- return Image.new("RGB", (512, 512)), "NSFW content detected"
97
 
 
 
 
98
  return image, "Generation successful"
99
 
100
  except Exception as e:
101
- return Image.new("RGB", (512, 512)), f"Error: {str(e)}"
102
 
103
  with gr.Blocks() as app:
104
  gr.Markdown("# Flux Super Realism Generator")
@@ -107,7 +99,7 @@ with gr.Blocks() as app:
107
  with gr.Column():
108
  prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
109
  seed_input = gr.Slider(0, 1000, value=42, label="Seed")
110
- # Limit resolution sliders to help avoid GPU memory overuse on a 15GB A100
111
  width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
112
  height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
113
  guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
@@ -124,8 +116,5 @@ with gr.Blocks() as app:
124
  outputs=[output_image, status]
125
  )
126
 
127
- # Rate limiting: 1 request at a time, with a max queue size of 3
128
  app.queue(max_size=3).launch()
129
-
130
- # Advanced multiple GPU support (uncomment if needed):
131
- # pipe.enable_sequential_cpu_offload()
 
5
  from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
6
  from functools import lru_cache
7
  from PIL import Image
8
+ from transformers import CLIPImageProcessor
 
 
9
 
10
  @lru_cache(maxsize=1)
11
  def load_pipeline():
 
12
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+ # Use FP16 when CUDA is available, along with a revision flag if supported.
14
  torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
15
+ revision = "fp16" if device.type == "cuda" else None
16
 
 
17
  base_model = "black-forest-labs/FLUX.1-dev"
18
  pipe = DiffusionPipeline.from_pretrained(
19
  base_model,
20
  torch_dtype=torch_dtype,
21
+ low_cpu_mem_usage=True,
22
+ revision=revision,
23
  )
24
 
25
  # Load LoRA weights
26
  lora_repo = "strangerzonehf/Flux-Super-Realism-LoRA"
27
  pipe.load_lora_weights(lora_repo)
28
 
29
+ # Load safety checker and image processor.
30
+ # If memory remains an issue, you can disable the safety checker below.
31
  safety_checker = StableDiffusionSafetyChecker.from_pretrained(
32
  "CompVis/stable-diffusion-safety-checker"
33
  )
34
  image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
35
 
 
36
  if device.type == "cuda":
37
+ # Use attention slicing for further memory savings.
38
  pipe.enable_attention_slicing()
39
+ # Offload layers to CPU when not in use.
40
+ pipe.enable_sequential_cpu_offload()
 
 
 
41
 
42
  return pipe, safety_checker, image_processor
43
 
 
46
  def generate_image(
47
  prompt,
48
  seed=42,
49
+ width=512, # Keep resolution low by default
50
  height=512,
51
  guidance_scale=6,
52
  steps=28,
 
61
  if "super realism" not in prompt.lower():
62
  prompt = f"Super Realism, {prompt}"
63
 
 
 
 
 
64
  with torch.inference_mode():
65
  result = pipe(
66
  prompt=prompt,
 
73
  image = result.images[0]
74
 
75
  progress(1, desc="Safety checking...")
76
+ # Process image for safety checking
77
  safety_input = image_processor(image, return_tensors="pt")
78
  np_image = np.array(image)
 
 
79
  _, nsfw_detected = safety_checker(
80
+ images=[np_image],
81
  clip_input=safety_input.pixel_values
82
  )
83
 
84
  if nsfw_detected[0]:
85
+ return Image.new("RGB", (width, height)), "NSFW content detected"
86
 
87
+ # Clear CUDA cache
88
+ if device.type == "cuda":
89
+ torch.cuda.empty_cache()
90
  return image, "Generation successful"
91
 
92
  except Exception as e:
93
+ return Image.new("RGB", (width, height)), f"Error: {str(e)}"
94
 
95
  with gr.Blocks() as app:
96
  gr.Markdown("# Flux Super Realism Generator")
 
99
  with gr.Column():
100
  prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
101
  seed_input = gr.Slider(0, 1000, value=42, label="Seed")
102
+ # Limit the resolution sliders to help avoid memory overuse.
103
  width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
104
  height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
105
  guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
 
116
  outputs=[output_image, status]
117
  )
118
 
119
+ # Queue settings to limit concurrent requests
120
  app.queue(max_size=3).launch()