Anurag181011 commited on
Commit
2872348
·
verified ·
1 Parent(s): c34f45c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -18
app.py CHANGED
@@ -9,15 +9,19 @@ from PIL import Image
9
  from torchvision import transforms
10
  from transformers import CLIPImageProcessor # Updated import
11
 
12
-
13
-
14
  @lru_cache(maxsize=1)
15
  def load_pipeline():
16
- # Load base model
 
 
 
 
17
  base_model = "black-forest-labs/FLUX.1-dev"
18
  pipe = DiffusionPipeline.from_pretrained(
19
  base_model,
20
- torch_dtype=torch.float32
 
 
21
  )
22
 
23
  # Load LoRA weights
@@ -30,11 +34,15 @@ def load_pipeline():
30
  )
31
  image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
32
 
33
- # Optimizations: enable memory efficient attention if using GPU
34
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35
  if device.type == "cuda":
36
- pipe.enable_xformers_memory_efficient_attention()
37
- pipe = pipe.to(device)
 
 
 
 
 
38
 
39
  return pipe, safety_checker, image_processor
40
 
@@ -43,8 +51,8 @@ pipe, safety_checker, image_processor = load_pipeline()
43
  def generate_image(
44
  prompt,
45
  seed=42,
46
- width=1024,
47
- height=1024,
48
  guidance_scale=6,
49
  steps=28,
50
  progress=gr.Progress()
@@ -58,9 +66,9 @@ def generate_image(
58
  if "super realism" not in prompt.lower():
59
  prompt = f"Super Realism, {prompt}"
60
 
61
- # Define the callback function with the proper signature
62
- def update_progress(step, timestep, latents):
63
- progress((step + 1) / steps, desc="Generating image...")
64
 
65
  with torch.inference_mode():
66
  result = pipe(
@@ -70,7 +78,6 @@ def generate_image(
70
  guidance_scale=guidance_scale,
71
  num_inference_steps=steps,
72
  generator=generator,
73
-
74
  )
75
  image = result.images[0]
76
 
@@ -100,8 +107,9 @@ with gr.Blocks() as app:
100
  with gr.Column():
101
  prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
102
  seed_input = gr.Slider(0, 1000, value=42, label="Seed")
103
- width_input = gr.Slider(512, 2048, value=1024, label="Width")
104
- height_input = gr.Slider(512, 2048, value=1024, label="Height")
 
105
  guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
106
  steps_input = gr.Slider(10, 100, value=28, label="Steps")
107
  submit = gr.Button("Generate")
@@ -119,6 +127,5 @@ with gr.Blocks() as app:
119
  # Rate limiting: 1 request at a time, with a max queue size of 3
120
  app.queue(max_size=3).launch()
121
 
122
- # Uncomment for advanced multiple GPU support:
123
- # pipe.enable_model_cpu_offload()
124
  # pipe.enable_sequential_cpu_offload()
 
9
  from torchvision import transforms
10
  from transformers import CLIPImageProcessor # Updated import
11
 
 
 
12
  @lru_cache(maxsize=1)
13
  def load_pipeline():
14
+ # Decide on torch_dtype based on device; use fp16 on CUDA to lower memory usage.
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ torch_dtype = torch.float16 if device.type == "cuda" else torch.float32
17
+
18
+ # Load the base model in the selected precision
19
  base_model = "black-forest-labs/FLUX.1-dev"
20
  pipe = DiffusionPipeline.from_pretrained(
21
  base_model,
22
+ torch_dtype=torch_dtype,
23
+ # low_cpu_mem_usage helps reduce CPU RAM usage during loading
24
+ low_cpu_mem_usage=True
25
  )
26
 
27
  # Load LoRA weights
 
34
  )
35
  image_processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
36
 
37
+ # If using CUDA, apply memory optimizations:
 
38
  if device.type == "cuda":
39
+ # Attention slicing splits up attention computations to save memory.
40
+ pipe.enable_attention_slicing()
41
+ # Instead of moving the entire model to GPU, offload parts to CPU when not needed.
42
+ # This is particularly useful on a 15GB GPU.
43
+ pipe.enable_model_cpu_offload()
44
+ # Note: xformers memory efficient attention is omitted here because
45
+ # model offload works best when not all weights are kept on GPU.
46
 
47
  return pipe, safety_checker, image_processor
48
 
 
51
  def generate_image(
52
  prompt,
53
  seed=42,
54
+ width=512, # default resolution adjusted to 512 for safety
55
+ height=512,
56
  guidance_scale=6,
57
  steps=28,
58
  progress=gr.Progress()
 
66
  if "super realism" not in prompt.lower():
67
  prompt = f"Super Realism, {prompt}"
68
 
69
+ # Optional: you could add a progress callback here if your pipeline supports it.
70
+ # def update_progress(step, timestep, latents):
71
+ # progress((step + 1) / steps, desc="Generating image...")
72
 
73
  with torch.inference_mode():
74
  result = pipe(
 
78
  guidance_scale=guidance_scale,
79
  num_inference_steps=steps,
80
  generator=generator,
 
81
  )
82
  image = result.images[0]
83
 
 
107
  with gr.Column():
108
  prompt_input = gr.Textbox(label="Prompt", value="A portrait of a person")
109
  seed_input = gr.Slider(0, 1000, value=42, label="Seed")
110
+ # Limit resolution sliders to help avoid GPU memory overuse on a 15GB A100
111
+ width_input = gr.Slider(256, 1024, value=512, step=64, label="Width")
112
+ height_input = gr.Slider(256, 1024, value=512, step=64, label="Height")
113
  guidance_input = gr.Slider(1, 20, value=6, label="Guidance Scale")
114
  steps_input = gr.Slider(10, 100, value=28, label="Steps")
115
  submit = gr.Button("Generate")
 
127
  # Rate limiting: 1 request at a time, with a max queue size of 3
128
  app.queue(max_size=3).launch()
129
 
130
+ # Advanced multiple GPU support (uncomment if needed):
 
131
  # pipe.enable_sequential_cpu_offload()