Himanshu806 commited on
Commit
69d628b
·
verified ·
1 Parent(s): 1aa779a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -64
app.py CHANGED
@@ -9,7 +9,8 @@ import torch
9
  from torchvision import transforms
10
  import zipfile
11
 
12
- from diffusers import FluxFillPipeline, AutoencoderKL
 
13
  from PIL import Image
14
 
15
  MAX_SEED = np.iinfo(np.int32).max
@@ -18,6 +19,9 @@ MAX_IMAGE_SIZE = 2048
18
  # Initialize the pipeline
19
  pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16).to("cuda")
20
 
 
 
 
21
  # Load LoRA models from JSON
22
  with open("lora_models.json", "r") as f:
23
  lora_models = json.load(f)
@@ -81,11 +85,98 @@ def calculate_optimal_dimensions(image: Image.Image, scale_factor=1.0):
81
 
82
  return width, height
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  @spaces.GPU(durations=300)
85
  def infer(edit_images, prompt, lora_model, strength, seed=42, randomize_seed=False,
86
  guidance_scale=3.5, num_inference_steps=28, lora_scale=0.75,
87
- scale_factor=1.0, progress=gr.Progress(track_tqdm=True)):
88
- gr.Info("Infering")
 
 
 
 
 
89
 
90
  # Load and enable LoRA if selected
91
  if lora_model != "None":
@@ -94,7 +185,7 @@ def infer(edit_images, prompt, lora_model, strength, seed=42, randomize_seed=Fal
94
  else:
95
  pipe.disable_lora()
96
 
97
- gr.Info("starting checks")
98
 
99
  image = edit_images["background"]
100
  mask = edit_images["layers"][0]
@@ -108,40 +199,73 @@ def infer(edit_images, prompt, lora_model, strength, seed=42, randomize_seed=Fal
108
 
109
  if randomize_seed:
110
  seed = random.randint(0, MAX_SEED)
111
-
 
 
 
112
  # Generate image
113
  gr.Info(f"Generating image at {width}x{height}")
114
- generator = torch.Generator(device='cuda').manual_seed(seed)
115
 
116
- # Configure pipeline parameters
117
- pipeline_kwargs = {
118
- "prompt": prompt,
119
- "prompt_2": prompt,
120
- "image": image,
121
- "mask_image": mask,
122
- "height": height,
123
- "width": width,
124
- "guidance_scale": guidance_scale,
125
- "strength": strength,
126
- "num_inference_steps": num_inference_steps,
127
- "generator": generator,
128
- }
 
 
 
 
 
 
 
 
129
 
130
- # Add LoRA scale if model supports it
131
- if lora_model != "None":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  try:
133
- pipeline_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
134
- except:
135
- gr.Info("LoRA scale not supported in this model version - using default scaling")
 
 
 
 
 
 
 
136
 
137
- # Run the pipeline
138
- try:
139
- output = pipe(**pipeline_kwargs)
140
- result_image = output.images[0]
141
- except Exception as e:
142
- gr.Error(f"Error during generation: {str(e)}")
143
- return None, seed
144
-
145
  output_image_jpg = result_image.convert("RGB")
146
  output_image_jpg.save("output.jpg", "JPEG")
147
 
@@ -154,7 +278,9 @@ def download_image(image):
154
  return "output.png"
155
 
156
  def save_details(result, edit_image, prompt, lora_model, strength, seed, guidance_scale,
157
- num_inference_steps, lora_scale, scale_factor):
 
 
158
  image = edit_image["background"]
159
  mask = edit_image["layers"][0]
160
 
@@ -171,7 +297,6 @@ def save_details(result, edit_image, prompt, lora_model, strength, seed, guidanc
171
 
172
  details = {
173
  "prompt": prompt,
174
- "negative_prompt": negative_prompt,
175
  "lora_model": lora_model,
176
  "lora_scale": lora_scale,
177
  "strength": strength,
@@ -180,7 +305,14 @@ def save_details(result, edit_image, prompt, lora_model, strength, seed, guidanc
180
  "num_inference_steps": num_inference_steps,
181
  "scale_factor": scale_factor,
182
  "width": result.width,
183
- "height": result.height
 
 
 
 
 
 
 
184
  }
185
 
186
  with open("details.json", "w") as f:
@@ -198,8 +330,11 @@ def save_details(result, edit_image, prompt, lora_model, strength, seed, guidanc
198
  def set_image_as_inpaint(image):
199
  return image
200
 
 
 
 
201
  examples = [
202
- "photography of a young woman, accent lighting, (front view:1.4)",
203
  ]
204
 
205
  css="""
@@ -207,12 +342,21 @@ css="""
207
  margin: 0 auto;
208
  max-width: 1000px;
209
  }
 
 
 
 
 
 
 
210
  """
211
 
212
  with gr.Blocks(css=css) as demo:
213
 
214
  with gr.Column(elem_id="col-container"):
215
- gr.Markdown(f"""# FLUX.1 [dev] Inpainting Tool""")
 
 
216
  with gr.Row():
217
  with gr.Column():
218
  edit_image = gr.ImageEditor(
@@ -230,7 +374,7 @@ with gr.Blocks(css=css) as demo:
230
  placeholder="Enter your prompt",
231
  container=True,
232
  )
233
-
234
  lora_model = gr.Dropdown(
235
  label="Select LoRA Model",
236
  choices=list(lora_models.keys()),
@@ -241,6 +385,42 @@ with gr.Blocks(css=css) as demo:
241
 
242
  result = gr.Image(label="Result", show_label=False)
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  with gr.Accordion("Advanced Settings", open=False):
245
  with gr.Row():
246
  seed = gr.Slider(
@@ -254,7 +434,7 @@ with gr.Blocks(css=css) as demo:
254
 
255
  with gr.Row():
256
  guidance_scale = gr.Slider(
257
- label="Guidance Scale",
258
  minimum=1,
259
  maximum=30,
260
  step=0.5,
@@ -262,46 +442,90 @@ with gr.Blocks(css=css) as demo:
262
  )
263
 
264
  num_inference_steps = gr.Slider(
265
- label="Number of inference steps",
266
  minimum=1,
267
  maximum=50,
268
  step=1,
269
  value=28,
270
  )
271
-
272
  with gr.Row():
273
- strength = gr.Slider(
274
- label="Strength",
275
  minimum=0,
276
- maximum=1,
277
- step=0.01,
278
- value=0.85,
 
279
  )
280
 
281
- lora_scale = gr.Slider(
282
- label="LoRA Scale",
283
- minimum=0,
284
- maximum=2,
285
- step=0.05,
286
- value=0.75,
287
- info="Controls the influence of the LoRA model"
288
- )
289
-
290
- with gr.Row():
291
- scale_factor = gr.Slider(
292
- label="Image Scale Factor",
293
  minimum=0.5,
294
  maximum=2.0,
295
  step=0.1,
296
- value=1.0,
297
- info="Scale factor for image dimensions (1.0 = original, 2.0 = double size)"
298
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  gr.on(
301
  triggers=[run_button.click, prompt.submit],
302
  fn=infer,
303
- inputs=[edit_image, prompt, lora_model, strength, seed, randomize_seed,
304
- guidance_scale, num_inference_steps, lora_scale, scale_factor],
 
 
 
 
305
  outputs=[result, seed]
306
  )
307
 
@@ -323,8 +547,12 @@ with gr.Blocks(css=css) as demo:
323
 
324
  save_button.click(
325
  fn=save_details,
326
- inputs=[result, edit_image, prompt, lora_model, strength, seed, guidance_scale,
327
- num_inference_steps, lora_scale, scale_factor],
 
 
 
 
328
  outputs=gr.File(label="Download/Save Status")
329
  )
330
 
 
9
  from torchvision import transforms
10
  import zipfile
11
 
12
+ from diffusers import FluxFillPipeline, AutoencoderKL, DDIMScheduler, DDPMScheduler
13
+ from diffusers.models.attention_processor import AttnProcessor2_0
14
  from PIL import Image
15
 
16
  MAX_SEED = np.iinfo(np.int32).max
 
19
  # Initialize the pipeline
20
  pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16).to("cuda")
21
 
22
+ # Enable memory efficient attention
23
+ pipe.enable_xformers_memory_efficient_attention()
24
+
25
  # Load LoRA models from JSON
26
  with open("lora_models.json", "r") as f:
27
  lora_models = json.load(f)
 
85
 
86
  return width, height
87
 
88
+ def preprocess_mask(mask, blur_radius=10):
89
+ """Apply blurring to create a soft mask for smoother transitions"""
90
+ from PIL import ImageFilter
91
+ if mask:
92
+ # Apply Gaussian blur to soften the mask edges
93
+ blurred_mask = mask.filter(ImageFilter.GaussianBlur(radius=blur_radius))
94
+ return blurred_mask
95
+ return mask
96
+
97
+ def enhance_detail(img, sharpness_factor=1.5):
98
+ """Enhance the details in the generated image"""
99
+ from PIL import ImageEnhance
100
+ if img:
101
+ enhancer = ImageEnhance.Sharpness(img)
102
+ enhanced_img = enhancer.enhance(sharpness_factor)
103
+ return enhanced_img
104
+ return img
105
+
106
+ def layer_based_inference(pipe, image, mask, prompt,
107
+ structure_guidance_scale, texture_guidance_scale,
108
+ structure_steps, texture_steps, strength, seed,
109
+ lora_scale, width, height):
110
+ """Perform a two-stage layer-based diffusion process for better quality"""
111
+ # Create generators with the same seed for reproducibility
112
+ structure_generator = torch.Generator(device='cuda').manual_seed(seed)
113
+ texture_generator = torch.Generator(device='cuda').manual_seed(seed+1)
114
+
115
+ # Configure structure pipeline (focus on shapes and composition)
116
+ pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
117
+
118
+ # Stage 1: Generate the overall structure with more steps but lower guidance
119
+ structure_kwargs = {
120
+ "prompt": f"structure of {prompt}",
121
+ "image": image,
122
+ "mask_image": mask,
123
+ "height": height,
124
+ "width": width,
125
+ "guidance_scale": structure_guidance_scale,
126
+ "strength": strength * 0.9, # Less strength to preserve original structure
127
+ "num_inference_steps": structure_steps,
128
+ "generator": structure_generator,
129
+ }
130
+
131
+ # Add LoRA scale if supported
132
+ if pipe.is_lora_enabled():
133
+ structure_kwargs["cross_attention_kwargs"] = {"scale": lora_scale * 0.8} # Reduce LoRA impact for structure
134
+
135
+ # Generate the structural base
136
+ try:
137
+ intermediate_result = pipe(**structure_kwargs).images[0]
138
+ except Exception as e:
139
+ print(f"Structure generation error: {str(e)}")
140
+ return None
141
+
142
+ # Configure texture pipeline (focus on details and textures)
143
+ pipe.scheduler = DDPMScheduler.from_config(pipe.scheduler.config)
144
+
145
+ # Stage 2: Refine with texture details using the intermediate result
146
+ texture_kwargs = {
147
+ "prompt": f"detailed texture of {prompt}",
148
+ "image": intermediate_result,
149
+ "mask_image": mask,
150
+ "height": height,
151
+ "width": width,
152
+ "guidance_scale": texture_guidance_scale,
153
+ "strength": strength * 0.6, # Lower strength to maintain structure
154
+ "num_inference_steps": texture_steps,
155
+ "generator": texture_generator,
156
+ }
157
+
158
+ # Add LoRA scale if supported
159
+ if pipe.is_lora_enabled():
160
+ texture_kwargs["cross_attention_kwargs"] = {"scale": lora_scale * 1.2} # Increase LoRA impact for texture
161
+
162
+ # Generate the final result with detailed textures
163
+ try:
164
+ final_result = pipe(**texture_kwargs).images[0]
165
+ return final_result
166
+ except Exception as e:
167
+ print(f"Texture generation error: {str(e)}")
168
+ return intermediate_result # Return intermediate result if texture stage fails
169
+
170
  @spaces.GPU(durations=300)
171
  def infer(edit_images, prompt, lora_model, strength, seed=42, randomize_seed=False,
172
  guidance_scale=3.5, num_inference_steps=28, lora_scale=0.75,
173
+ scale_factor=1.0,
174
+ use_layered_diffusion=True, blur_mask=8, detail_level=1.2,
175
+ structure_guidance=2.5, texture_guidance=5.0,
176
+ structure_steps=20, texture_steps=15,
177
+ progress=gr.Progress(track_tqdm=True)):
178
+
179
+ gr.Info("Starting inference process")
180
 
181
  # Load and enable LoRA if selected
182
  if lora_model != "None":
 
185
  else:
186
  pipe.disable_lora()
187
 
188
+ gr.Info("Processing input images")
189
 
190
  image = edit_images["background"]
191
  mask = edit_images["layers"][0]
 
199
 
200
  if randomize_seed:
201
  seed = random.randint(0, MAX_SEED)
202
+
203
+ # Process the mask for smoother transitions
204
+ processed_mask = preprocess_mask(mask, blur_radius=blur_mask)
205
+
206
  # Generate image
207
  gr.Info(f"Generating image at {width}x{height}")
 
208
 
209
+ if use_layered_diffusion:
210
+ gr.Info("Using layered diffusion for higher quality output")
211
+ result_image = layer_based_inference(
212
+ pipe=pipe,
213
+ image=image,
214
+ mask=processed_mask,
215
+ prompt=prompt,
216
+ structure_guidance_scale=structure_guidance,
217
+ texture_guidance_scale=texture_guidance,
218
+ structure_steps=structure_steps,
219
+ texture_steps=texture_steps,
220
+ strength=strength,
221
+ seed=seed,
222
+ lora_scale=lora_scale,
223
+ width=width,
224
+ height=height
225
+ )
226
+
227
+ if result_image is None:
228
+ gr.Error("Layered diffusion failed. Falling back to standard diffusion.")
229
+ use_layered_diffusion = False
230
 
231
+ if not use_layered_diffusion:
232
+ # Standard diffusion as fallback
233
+ generator = torch.Generator(device='cuda').manual_seed(seed)
234
+
235
+ # Configure pipeline parameters
236
+ pipeline_kwargs = {
237
+ "prompt": prompt,
238
+ "prompt_2": prompt,
239
+ "image": image,
240
+ "mask_image": processed_mask,
241
+ "height": height,
242
+ "width": width,
243
+ "guidance_scale": guidance_scale,
244
+ "strength": strength,
245
+ "num_inference_steps": num_inference_steps,
246
+ "generator": generator,
247
+ }
248
+
249
+ # Add LoRA scale if model supports it
250
+ if lora_model != "None":
251
+ try:
252
+ pipeline_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}
253
+ except:
254
+ gr.Info("LoRA scale not supported - using default scaling")
255
+
256
+ # Run the pipeline
257
  try:
258
+ output = pipe(**pipeline_kwargs)
259
+ result_image = output.images[0]
260
+ except Exception as e:
261
+ gr.Error(f"Error during generation: {str(e)}")
262
+ return None, seed
263
+
264
+ # Enhance details based on user preference
265
+ if detail_level > 1.0:
266
+ gr.Info("Enhancing image details")
267
+ result_image = enhance_detail(result_image, sharpness_factor=detail_level)
268
 
 
 
 
 
 
 
 
 
269
  output_image_jpg = result_image.convert("RGB")
270
  output_image_jpg.save("output.jpg", "JPEG")
271
 
 
278
  return "output.png"
279
 
280
  def save_details(result, edit_image, prompt, lora_model, strength, seed, guidance_scale,
281
+ num_inference_steps, lora_scale, scale_factor,
282
+ use_layered_diffusion, blur_mask, detail_level,
283
+ structure_guidance, texture_guidance, structure_steps, texture_steps):
284
  image = edit_image["background"]
285
  mask = edit_image["layers"][0]
286
 
 
297
 
298
  details = {
299
  "prompt": prompt,
 
300
  "lora_model": lora_model,
301
  "lora_scale": lora_scale,
302
  "strength": strength,
 
305
  "num_inference_steps": num_inference_steps,
306
  "scale_factor": scale_factor,
307
  "width": result.width,
308
+ "height": result.height,
309
+ "use_layered_diffusion": use_layered_diffusion,
310
+ "blur_mask": blur_mask,
311
+ "detail_level": detail_level,
312
+ "structure_guidance": structure_guidance,
313
+ "texture_guidance": texture_guidance,
314
+ "structure_steps": structure_steps,
315
+ "texture_steps": texture_steps
316
  }
317
 
318
  with open("details.json", "w") as f:
 
330
  def set_image_as_inpaint(image):
331
  return image
332
 
333
+ def toggle_layered_diffusion(use_layers):
334
+ return gr.update(visible=use_layers)
335
+
336
  examples = [
337
+ "photography of a young woman, accent lighting, (front view:1.4), detailed skin texture, 8k",
338
  ]
339
 
340
  css="""
 
342
  margin: 0 auto;
343
  max-width: 1000px;
344
  }
345
+ .layer-settings {
346
+ border: 1px solid #ccc;
347
+ padding: 10px;
348
+ border-radius: 8px;
349
+ background-color: #f9f9f9;
350
+ margin-top: 10px;
351
+ }
352
  """
353
 
354
  with gr.Blocks(css=css) as demo:
355
 
356
  with gr.Column(elem_id="col-container"):
357
+ gr.Markdown(f"""# FLUX.1 [dev] Advanced Inpainting Tool
358
+ *Now with layered diffusion for improved quality and control*
359
+ """)
360
  with gr.Row():
361
  with gr.Column():
362
  edit_image = gr.ImageEditor(
 
374
  placeholder="Enter your prompt",
375
  container=True,
376
  )
377
+
378
  lora_model = gr.Dropdown(
379
  label="Select LoRA Model",
380
  choices=list(lora_models.keys()),
 
385
 
386
  result = gr.Image(label="Result", show_label=False)
387
 
388
+ with gr.Accordion("Basic Settings", open=True):
389
+ with gr.Row():
390
+ strength = gr.Slider(
391
+ label="Strength",
392
+ minimum=0,
393
+ maximum=1,
394
+ step=0.01,
395
+ value=0.85,
396
+ info="Controls how much to modify the original image"
397
+ )
398
+
399
+ lora_scale = gr.Slider(
400
+ label="LoRA Scale",
401
+ minimum=0,
402
+ maximum=2,
403
+ step=0.05,
404
+ value=0.75,
405
+ info="Controls the influence of the LoRA model"
406
+ )
407
+
408
+ with gr.Row():
409
+ scale_factor = gr.Slider(
410
+ label="Image Scale Factor",
411
+ minimum=0.5,
412
+ maximum=2.0,
413
+ step=0.1,
414
+ value=1.0,
415
+ info="Scale factor for image dimensions (1.0 = original, 2.0 = double size)"
416
+ )
417
+
418
+ use_layered_diffusion = gr.Checkbox(
419
+ label="Use Layered Diffusion",
420
+ value=True,
421
+ info="Process in structure and texture layers for better quality"
422
+ )
423
+
424
  with gr.Accordion("Advanced Settings", open=False):
425
  with gr.Row():
426
  seed = gr.Slider(
 
434
 
435
  with gr.Row():
436
  guidance_scale = gr.Slider(
437
+ label="Guidance Scale (Standard Mode)",
438
  minimum=1,
439
  maximum=30,
440
  step=0.5,
 
442
  )
443
 
444
  num_inference_steps = gr.Slider(
445
+ label="Inference Steps (Standard Mode)",
446
  minimum=1,
447
  maximum=50,
448
  step=1,
449
  value=28,
450
  )
451
+
452
  with gr.Row():
453
+ blur_mask = gr.Slider(
454
+ label="Mask Blur Radius",
455
  minimum=0,
456
+ maximum=30,
457
+ step=1,
458
+ value=8,
459
+ info="Higher values create smoother transitions at mask boundaries"
460
  )
461
 
462
+ detail_level = gr.Slider(
463
+ label="Detail Enhancement",
 
 
 
 
 
 
 
 
 
 
464
  minimum=0.5,
465
  maximum=2.0,
466
  step=0.1,
467
+ value=1.2,
468
+ info="Controls the sharpness of the final image"
469
  )
470
+
471
+ # Layered diffusion settings
472
+ with gr.Column(visible=True, elem_id="layer-settings", elem_class="layer-settings") as layer_settings:
473
+ gr.Markdown("### Layered Diffusion Settings")
474
+
475
+ with gr.Row():
476
+ structure_guidance = gr.Slider(
477
+ label="Structure Guidance Scale",
478
+ minimum=1.0,
479
+ maximum=10.0,
480
+ step=0.1,
481
+ value=2.5,
482
+ info="Controls adherence to prompt during structure generation (lower = more creative)"
483
+ )
484
+
485
+ texture_guidance = gr.Slider(
486
+ label="Texture Guidance Scale",
487
+ minimum=1.0,
488
+ maximum=10.0,
489
+ step=0.1,
490
+ value=5.0,
491
+ info="Controls adherence to prompt during texture refinement (higher = more detailed)"
492
+ )
493
+
494
+ with gr.Row():
495
+ structure_steps = gr.Slider(
496
+ label="Structure Steps",
497
+ minimum=10,
498
+ maximum=40,
499
+ step=1,
500
+ value=20,
501
+ info="Inference steps for structure generation"
502
+ )
503
+
504
+ texture_steps = gr.Slider(
505
+ label="Texture Steps",
506
+ minimum=10,
507
+ maximum=40,
508
+ step=1,
509
+ value=15,
510
+ info="Inference steps for texture refinement"
511
+ )
512
+
513
+ # Toggle visibility of layered settings based on checkbox
514
+ use_layered_diffusion.change(
515
+ fn=toggle_layered_diffusion,
516
+ inputs=[use_layered_diffusion],
517
+ outputs=[layer_settings]
518
+ )
519
 
520
  gr.on(
521
  triggers=[run_button.click, prompt.submit],
522
  fn=infer,
523
+ inputs=[
524
+ edit_image, prompt, lora_model, strength, seed, randomize_seed,
525
+ guidance_scale, num_inference_steps, lora_scale, scale_factor,
526
+ use_layered_diffusion, blur_mask, detail_level,
527
+ structure_guidance, texture_guidance, structure_steps, texture_steps
528
+ ],
529
  outputs=[result, seed]
530
  )
531
 
 
547
 
548
  save_button.click(
549
  fn=save_details,
550
+ inputs=[
551
+ result, edit_image, prompt, lora_model, strength, seed, guidance_scale,
552
+ num_inference_steps, lora_scale, scale_factor,
553
+ use_layered_diffusion, blur_mask, detail_level,
554
+ structure_guidance, texture_guidance, structure_steps, texture_steps
555
+ ],
556
  outputs=gr.File(label="Download/Save Status")
557
  )
558