1inkusFace commited on
Commit
d793d7f
·
verified ·
1 Parent(s): bdc3a73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -114
app.py CHANGED
@@ -23,7 +23,12 @@ from huggingface_hub import snapshot_download
23
  import gc
24
  import torch
25
  from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
- from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
 
 
 
 
 
27
  from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
28
  torch.backends.cuda.matmul.allow_tf32 = False
29
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -229,7 +234,7 @@ def save_image(img):
229
  def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
230
  filename= f'IP_{timestamp}.txt'
231
  with open(filename, "w") as f:
232
- f.write(f"Realvis 5.0 IP Adapter \n")
233
  f.write(f"Date/time: {timestamp} \n")
234
  f.write(f"Prompt: {prompt} \n")
235
  f.write(f"Steps: {num_inference_steps} \n")
@@ -259,7 +264,7 @@ def captioning(img):
259
  **inputsa,
260
  do_sample=False,
261
  num_beams=5,
262
- max_length=256,
263
  min_length=1,
264
  top_p=0.9,
265
  repetition_penalty=1.5,
@@ -284,18 +289,10 @@ def captioning(img):
284
  length_penalty=1.0,
285
  temperature=1,
286
  )
287
- # Adjust max_length if needed
288
  generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
289
  response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
290
  output_prompt.append(response_text)
291
  print(f"{response_text}\n") # Print only the response text
292
- # Continue conversation:
293
- # inputf = processor5(images=img, text=generated_text + 'So therefore', return_tensors="pt").to('cuda')
294
- # generated_ids = model5.generate(**inputf, min_length=24, max_length=42)
295
- # generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
296
- # response_text = generated_text.replace(generated_text, "").strip() # Remove the previous text plus 'So therefore'
297
- # print(response_text)
298
- #output_prompt.append(response_text)
299
  print(output_prompt)
300
  return output_prompt
301
 
@@ -389,8 +386,6 @@ def generate_30(
389
  samples=1,
390
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
391
  ):
392
- #global captioner_2
393
- #captioner2=captioner_2
394
  seed = random.randint(0, MAX_SEED)
395
  generator = torch.Generator(device='cuda').manual_seed(seed)
396
  if latent_file is not None: # Check if a latent file is provided
@@ -398,62 +393,45 @@ def generate_30(
398
  sd_image_a.resize((224,224), Image.LANCZOS)
399
  #sd_image_a.resize((height,width), Image.LANCZOS)
400
  caption=[]
401
- caption_2=[]
402
- #caption.append(captioner(sd_image_a))
403
- #caption.append(captioner2(sd_image_a))
404
- #caption.append(captioner_3(sd_image_a))
405
- caption_2.append(captioning(sd_image_a))
406
  if latent_file_2 is not None: # Check if a latent file is provided
407
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
408
  #sd_image_b.resize((height,width), Image.LANCZOS)
409
  sd_image_b.resize((224,224), Image.LANCZOS)
410
- #caption.append(captioner(sd_image_b))
411
- ##caption.append(captioner2(sd_image_b))
412
- #caption.append(captioner_3(sd_image_b))
413
- caption_2.append(captioning(sd_image_b))
414
  else:
415
  sd_image_b = None
416
  if latent_file_3 is not None: # Check if a latent file is provided
417
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
418
  #sd_image_c.resize((height,width), Image.LANCZOS)
419
  sd_image_c.resize((224,224), Image.LANCZOS)
420
- #caption.append(captioner(sd_image_c))
421
- #caption.append(captioner2(sd_image_c))
422
- #caption.append(captioner_3(sd_image_c))
423
- caption_2.append(captioning(sd_image_c))
424
  else:
425
  sd_image_c = None
426
  if latent_file_4 is not None: # Check if a latent file is provided
427
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
428
  #sd_image_d.resize((height,width), Image.LANCZOS)
429
  sd_image_d.resize((224,224), Image.LANCZOS)
430
- #caption.append(captioner(sd_image_d))
431
- #caption.append(captioner2(sd_image_d))
432
- #caption.append(captioner_3(sd_image_d))
433
- caption_2.append(captioning(sd_image_d))
434
  else:
435
  sd_image_d = None
436
  if latent_file_5 is not None: # Check if a latent file is provided
437
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
438
  #sd_image_e.resize((height,width), Image.LANCZOS)
439
  sd_image_e.resize((224,224), Image.LANCZOS)
440
- #caption.append(captioner(sd_image_e))
441
- #caption.append(captioner2(sd_image_e))
442
- #caption.append(captioner_3(sd_image_e))
443
- caption_2.append(captioning(sd_image_e))
444
  else:
445
  sd_image_e = None
446
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
447
- filename= f'rv_IP_{timestamp}.png'
448
  print("-- using image file --")
449
- captions =prompt+ caption+caption_2
450
  captions = flatten_and_stringify(captions)
451
  captions = " ".join(captions)
452
  print(captions)
453
- print("-- generating further caption --")
454
  global model5
455
  global processor5
456
- del captioner2
457
  del model5
458
  del processor5
459
  gc.collect()
@@ -466,19 +444,12 @@ def generate_30(
466
  print(new_prompt)
467
  print("-- FINAL PROMPT --")
468
  print("-- ------------ --")
469
- #global model
470
- #global txt_tokenizer
471
- #del model
472
- #del txt_tokenizer
473
  gc.collect()
474
  torch.cuda.empty_cache()
475
  global text_encoder_1
476
  global text_encoder_2
477
- #global unetX
478
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
479
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
480
- #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
481
- #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
482
  print('-- generating image --')
483
  sd_image = ip_model.generate(
484
  pil_image_1=sd_image_a,
@@ -540,8 +511,6 @@ def generate_60(
540
  samples=1,
541
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
542
  ):
543
- #global captioner_2
544
- #captioner2=captioner_2
545
  seed = random.randint(0, MAX_SEED)
546
  generator = torch.Generator(device='cuda').manual_seed(seed)
547
  if latent_file is not None: # Check if a latent file is provided
@@ -549,62 +518,45 @@ def generate_60(
549
  sd_image_a.resize((224,224), Image.LANCZOS)
550
  #sd_image_a.resize((height,width), Image.LANCZOS)
551
  caption=[]
552
- caption_2=[]
553
- #caption.append(captioner(sd_image_a))
554
- #caption.append(captioner2(sd_image_a))
555
- #caption.append(captioner_3(sd_image_a))
556
- caption_2.append(captioning(sd_image_a))
557
  if latent_file_2 is not None: # Check if a latent file is provided
558
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
559
  #sd_image_b.resize((height,width), Image.LANCZOS)
560
  sd_image_b.resize((224,224), Image.LANCZOS)
561
- #caption.append(captioner(sd_image_b))
562
- #caption.append(captioner2(sd_image_b))
563
- #caption.append(captioner_3(sd_image_b))
564
- caption_2.append(captioning(sd_image_b))
565
  else:
566
  sd_image_b = None
567
  if latent_file_3 is not None: # Check if a latent file is provided
568
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
569
  #sd_image_c.resize((height,width), Image.LANCZOS)
570
  sd_image_c.resize((224,224), Image.LANCZOS)
571
- #caption.append(captioner(sd_image_c))
572
- caption.append(captioner2(sd_image_c))
573
- #caption.append(captioner_3(sd_image_c))
574
- caption_2.append(captioning(sd_image_c))
575
  else:
576
  sd_image_c = None
577
  if latent_file_4 is not None: # Check if a latent file is provided
578
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
579
  #sd_image_d.resize((height,width), Image.LANCZOS)
580
  sd_image_d.resize((224,224), Image.LANCZOS)
581
- #caption.append(captioner(sd_image_d))
582
- caption.append(captioner2(sd_image_d))
583
- #caption.append(captioner_3(sd_image_d))
584
- caption_2.append(captioning(sd_image_d))
585
  else:
586
  sd_image_d = None
587
  if latent_file_5 is not None: # Check if a latent file is provided
588
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
589
  #sd_image_e.resize((height,width), Image.LANCZOS)
590
  sd_image_e.resize((224,224), Image.LANCZOS)
591
- #caption.append(captioner(sd_image_e))
592
- caption.append(captioner2(sd_image_e))
593
- #caption.append(captioner_3(sd_image_e))
594
- caption_2.append(captioning(sd_image_e))
595
  else:
596
  sd_image_e = None
597
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
598
- filename= f'rv_IP_{timestamp}.png'
599
  print("-- using image file --")
600
- captions =prompt+ caption+caption_2
601
  captions = flatten_and_stringify(captions)
602
  captions = " ".join(captions)
603
  print(captions)
604
- print("-- generating further caption --")
605
  global model5
606
  global processor5
607
- del captioner2
608
  del model5
609
  del processor5
610
  gc.collect()
@@ -617,19 +569,12 @@ def generate_60(
617
  print(new_prompt)
618
  print("-- FINAL PROMPT --")
619
  print("-- ------------ --")
620
- #global model
621
- #global txt_tokenizer
622
- #del model
623
- #del txt_tokenizer
624
  gc.collect()
625
  torch.cuda.empty_cache()
626
  global text_encoder_1
627
  global text_encoder_2
628
- #global unetX
629
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
630
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
631
- #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
632
- #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
633
  print('-- generating image --')
634
  sd_image = ip_model.generate(
635
  pil_image_1=sd_image_a,
@@ -691,8 +636,6 @@ def generate_90(
691
  samples=1,
692
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
693
  ):
694
- #global captioner_2
695
- #captioner2=captioner_2
696
  seed = random.randint(0, MAX_SEED)
697
  generator = torch.Generator(device='cuda').manual_seed(seed)
698
  if latent_file is not None: # Check if a latent file is provided
@@ -700,63 +643,45 @@ def generate_90(
700
  sd_image_a.resize((224,224), Image.LANCZOS)
701
  #sd_image_a.resize((height,width), Image.LANCZOS)
702
  caption=[]
703
- caption_2=[]
704
- #caption.append(captioner(sd_image_a))
705
- cap=captioner2(sd_image_a)
706
- caption.append(cap)
707
- #caption.append(captioner_3(sd_image_a))
708
- caption_2.append(captioning(sd_image_a))
709
  if latent_file_2 is not None: # Check if a latent file is provided
710
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
711
  #sd_image_b.resize((height,width), Image.LANCZOS)
712
  sd_image_b.resize((224,224), Image.LANCZOS)
713
- #caption.append(captioner(sd_image_b))
714
- #caption.append(captioner2(sd_image_b))
715
- #caption.append(captioner_3(sd_image_b))
716
- caption_2.append(captioning(sd_image_b))
717
  else:
718
  sd_image_b = None
719
  if latent_file_3 is not None: # Check if a latent file is provided
720
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
721
  #sd_image_c.resize((height,width), Image.LANCZOS)
722
  sd_image_c.resize((224,224), Image.LANCZOS)
723
- #caption.append(captioner(sd_image_c))
724
- caption.append(captioner2(sd_image_c))
725
- #caption.append(captioner_3(sd_image_c))
726
- caption_2.append(captioning(sd_image_c))
727
  else:
728
  sd_image_c = None
729
  if latent_file_4 is not None: # Check if a latent file is provided
730
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
731
  #sd_image_d.resize((height,width), Image.LANCZOS)
732
  sd_image_d.resize((224,224), Image.LANCZOS)
733
- #caption.append(captioner(sd_image_d))
734
- caption.append(captioner2(sd_image_d))
735
- #caption.append(captioner_3(sd_image_d))
736
- caption_2.append(captioning(sd_image_d))
737
  else:
738
  sd_image_d = None
739
  if latent_file_5 is not None: # Check if a latent file is provided
740
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
741
  #sd_image_e.resize((height,width), Image.LANCZOS)
742
  sd_image_e.resize((224,224), Image.LANCZOS)
743
- #caption.append(captioner(sd_image_e))
744
- caption.append(captioner2(sd_image_e))
745
- #caption.append(captioner_3(sd_image_e))
746
- caption_2.append(captioning(sd_image_e))
747
  else:
748
  sd_image_e = None
749
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
750
- filename= f'rv_IP_{timestamp}.png'
751
  print("-- using image file --")
752
- captions =prompt+ caption+caption_2
753
  captions = flatten_and_stringify(captions)
754
  captions = " ".join(captions)
755
  print(captions)
756
- print("-- generating further caption --")
757
  global model5
758
  global processor5
759
- del captioner2
760
  del model5
761
  del processor5
762
  gc.collect()
@@ -769,19 +694,12 @@ def generate_90(
769
  print(new_prompt)
770
  print("-- FINAL PROMPT --")
771
  print("-- ------------ --")
772
- #global model
773
- #global txt_tokenizer
774
- #del model
775
- #del txt_tokenizer
776
  gc.collect()
777
  torch.cuda.empty_cache()
778
  global text_encoder_1
779
  global text_encoder_2
780
- #global unetX
781
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
782
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
783
- #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
784
- #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
785
  print('-- generating image --')
786
  sd_image = ip_model.generate(
787
  pil_image_1=sd_image_a,
 
23
  import gc
24
  import torch
25
  from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
+ from transformers import CLIPTextModelWithProjection, CLIPTextModel
27
+ #from transformers import AutoTokenizer, AutoModelForCausalLM
28
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
29
+ from transformers import Phi3ForCausalLM
30
+ from transformers import pipeline
31
+
32
  from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
33
  torch.backends.cuda.matmul.allow_tf32 = False
34
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
234
  def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
235
  filename= f'IP_{timestamp}.txt'
236
  with open(filename, "w") as f:
237
+ f.write(f"Realvis 5.0 IP Adapter Test B\n")
238
  f.write(f"Date/time: {timestamp} \n")
239
  f.write(f"Prompt: {prompt} \n")
240
  f.write(f"Steps: {num_inference_steps} \n")
 
264
  **inputsa,
265
  do_sample=False,
266
  num_beams=5,
267
+ max_length=128,
268
  min_length=1,
269
  top_p=0.9,
270
  repetition_penalty=1.5,
 
289
  length_penalty=1.0,
290
  temperature=1,
291
  )
 
292
  generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
293
  response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
294
  output_prompt.append(response_text)
295
  print(f"{response_text}\n") # Print only the response text
 
 
 
 
 
 
 
296
  print(output_prompt)
297
  return output_prompt
298
 
 
386
  samples=1,
387
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
388
  ):
 
 
389
  seed = random.randint(0, MAX_SEED)
390
  generator = torch.Generator(device='cuda').manual_seed(seed)
391
  if latent_file is not None: # Check if a latent file is provided
 
393
  sd_image_a.resize((224,224), Image.LANCZOS)
394
  #sd_image_a.resize((height,width), Image.LANCZOS)
395
  caption=[]
396
+ caption.append(captioning(sd_image_a))
 
 
 
 
397
  if latent_file_2 is not None: # Check if a latent file is provided
398
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
399
  #sd_image_b.resize((height,width), Image.LANCZOS)
400
  sd_image_b.resize((224,224), Image.LANCZOS)
401
+ caption.append(captioning(sd_image_b))
 
 
 
402
  else:
403
  sd_image_b = None
404
  if latent_file_3 is not None: # Check if a latent file is provided
405
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
406
  #sd_image_c.resize((height,width), Image.LANCZOS)
407
  sd_image_c.resize((224,224), Image.LANCZOS)
408
+ caption.append(captioning(sd_image_c))
 
 
 
409
  else:
410
  sd_image_c = None
411
  if latent_file_4 is not None: # Check if a latent file is provided
412
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
413
  #sd_image_d.resize((height,width), Image.LANCZOS)
414
  sd_image_d.resize((224,224), Image.LANCZOS)
415
+ caption.append(captioning(sd_image_d))
 
 
 
416
  else:
417
  sd_image_d = None
418
  if latent_file_5 is not None: # Check if a latent file is provided
419
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
420
  #sd_image_e.resize((height,width), Image.LANCZOS)
421
  sd_image_e.resize((224,224), Image.LANCZOS)
422
+ caption.append(captioning(sd_image_e))
 
 
 
423
  else:
424
  sd_image_e = None
425
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
426
+ filename= f'rv_IPb_{timestamp}.png'
427
  print("-- using image file --")
428
+ captions =prompt+caption
429
  captions = flatten_and_stringify(captions)
430
  captions = " ".join(captions)
431
  print(captions)
432
+ print("-- not generating further caption --")
433
  global model5
434
  global processor5
 
435
  del model5
436
  del processor5
437
  gc.collect()
 
444
  print(new_prompt)
445
  print("-- FINAL PROMPT --")
446
  print("-- ------------ --")
 
 
 
 
447
  gc.collect()
448
  torch.cuda.empty_cache()
449
  global text_encoder_1
450
  global text_encoder_2
 
451
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
452
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
 
 
453
  print('-- generating image --')
454
  sd_image = ip_model.generate(
455
  pil_image_1=sd_image_a,
 
511
  samples=1,
512
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
513
  ):
 
 
514
  seed = random.randint(0, MAX_SEED)
515
  generator = torch.Generator(device='cuda').manual_seed(seed)
516
  if latent_file is not None: # Check if a latent file is provided
 
518
  sd_image_a.resize((224,224), Image.LANCZOS)
519
  #sd_image_a.resize((height,width), Image.LANCZOS)
520
  caption=[]
521
+ caption.append(captioning(sd_image_a))
 
 
 
 
522
  if latent_file_2 is not None: # Check if a latent file is provided
523
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
524
  #sd_image_b.resize((height,width), Image.LANCZOS)
525
  sd_image_b.resize((224,224), Image.LANCZOS)
526
+ caption.append(captioning(sd_image_b))
 
 
 
527
  else:
528
  sd_image_b = None
529
  if latent_file_3 is not None: # Check if a latent file is provided
530
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
531
  #sd_image_c.resize((height,width), Image.LANCZOS)
532
  sd_image_c.resize((224,224), Image.LANCZOS)
533
+ caption.append(captioning(sd_image_c))
 
 
 
534
  else:
535
  sd_image_c = None
536
  if latent_file_4 is not None: # Check if a latent file is provided
537
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
538
  #sd_image_d.resize((height,width), Image.LANCZOS)
539
  sd_image_d.resize((224,224), Image.LANCZOS)
540
+ caption.append(captioning(sd_image_d))
 
 
 
541
  else:
542
  sd_image_d = None
543
  if latent_file_5 is not None: # Check if a latent file is provided
544
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
545
  #sd_image_e.resize((height,width), Image.LANCZOS)
546
  sd_image_e.resize((224,224), Image.LANCZOS)
547
+ caption.append(captioning(sd_image_e))
 
 
 
548
  else:
549
  sd_image_e = None
550
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
551
+ filename= f'rv_IPb_{timestamp}.png'
552
  print("-- using image file --")
553
+ captions =prompt+caption
554
  captions = flatten_and_stringify(captions)
555
  captions = " ".join(captions)
556
  print(captions)
557
+ print("-- not generating further caption --")
558
  global model5
559
  global processor5
 
560
  del model5
561
  del processor5
562
  gc.collect()
 
569
  print(new_prompt)
570
  print("-- FINAL PROMPT --")
571
  print("-- ------------ --")
 
 
 
 
572
  gc.collect()
573
  torch.cuda.empty_cache()
574
  global text_encoder_1
575
  global text_encoder_2
 
576
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
577
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
 
 
578
  print('-- generating image --')
579
  sd_image = ip_model.generate(
580
  pil_image_1=sd_image_a,
 
636
  samples=1,
637
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
638
  ):
 
 
639
  seed = random.randint(0, MAX_SEED)
640
  generator = torch.Generator(device='cuda').manual_seed(seed)
641
  if latent_file is not None: # Check if a latent file is provided
 
643
  sd_image_a.resize((224,224), Image.LANCZOS)
644
  #sd_image_a.resize((height,width), Image.LANCZOS)
645
  caption=[]
646
+ caption.append(captioning(sd_image_a))
 
 
 
 
 
647
  if latent_file_2 is not None: # Check if a latent file is provided
648
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
649
  #sd_image_b.resize((height,width), Image.LANCZOS)
650
  sd_image_b.resize((224,224), Image.LANCZOS)
651
+ caption.append(captioning(sd_image_b))
 
 
 
652
  else:
653
  sd_image_b = None
654
  if latent_file_3 is not None: # Check if a latent file is provided
655
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
656
  #sd_image_c.resize((height,width), Image.LANCZOS)
657
  sd_image_c.resize((224,224), Image.LANCZOS)
658
+ caption.append(captioning(sd_image_c))
 
 
 
659
  else:
660
  sd_image_c = None
661
  if latent_file_4 is not None: # Check if a latent file is provided
662
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
663
  #sd_image_d.resize((height,width), Image.LANCZOS)
664
  sd_image_d.resize((224,224), Image.LANCZOS)
665
+ caption.append(captioning(sd_image_d))
 
 
 
666
  else:
667
  sd_image_d = None
668
  if latent_file_5 is not None: # Check if a latent file is provided
669
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
670
  #sd_image_e.resize((height,width), Image.LANCZOS)
671
  sd_image_e.resize((224,224), Image.LANCZOS)
672
+ caption.append(captioning(sd_image_e))
 
 
 
673
  else:
674
  sd_image_e = None
675
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
676
+ filename= f'rv_IPb_{timestamp}.png'
677
  print("-- using image file --")
678
+ captions =prompt+caption
679
  captions = flatten_and_stringify(captions)
680
  captions = " ".join(captions)
681
  print(captions)
682
+ print("-- not generating further caption --")
683
  global model5
684
  global processor5
 
685
  del model5
686
  del processor5
687
  gc.collect()
 
694
  print(new_prompt)
695
  print("-- FINAL PROMPT --")
696
  print("-- ------------ --")
 
 
 
 
697
  gc.collect()
698
  torch.cuda.empty_cache()
699
  global text_encoder_1
700
  global text_encoder_2
 
701
  pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
702
  pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
 
 
703
  print('-- generating image --')
704
  sd_image = ip_model.generate(
705
  pil_image_1=sd_image_a,