Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,12 @@ from huggingface_hub import snapshot_download
|
|
23 |
import gc
|
24 |
import torch
|
25 |
from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
26 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
27 |
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
|
28 |
torch.backends.cuda.matmul.allow_tf32 = False
|
29 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
@@ -229,7 +234,7 @@ def save_image(img):
|
|
229 |
def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
|
230 |
filename= f'IP_{timestamp}.txt'
|
231 |
with open(filename, "w") as f:
|
232 |
-
f.write(f"Realvis 5.0 IP Adapter \n")
|
233 |
f.write(f"Date/time: {timestamp} \n")
|
234 |
f.write(f"Prompt: {prompt} \n")
|
235 |
f.write(f"Steps: {num_inference_steps} \n")
|
@@ -259,7 +264,7 @@ def captioning(img):
|
|
259 |
**inputsa,
|
260 |
do_sample=False,
|
261 |
num_beams=5,
|
262 |
-
max_length=
|
263 |
min_length=1,
|
264 |
top_p=0.9,
|
265 |
repetition_penalty=1.5,
|
@@ -284,18 +289,10 @@ def captioning(img):
|
|
284 |
length_penalty=1.0,
|
285 |
temperature=1,
|
286 |
)
|
287 |
-
# Adjust max_length if needed
|
288 |
generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
289 |
response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
|
290 |
output_prompt.append(response_text)
|
291 |
print(f"{response_text}\n") # Print only the response text
|
292 |
-
# Continue conversation:
|
293 |
-
# inputf = processor5(images=img, text=generated_text + 'So therefore', return_tensors="pt").to('cuda')
|
294 |
-
# generated_ids = model5.generate(**inputf, min_length=24, max_length=42)
|
295 |
-
# generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
296 |
-
# response_text = generated_text.replace(generated_text, "").strip() # Remove the previous text plus 'So therefore'
|
297 |
-
# print(response_text)
|
298 |
-
#output_prompt.append(response_text)
|
299 |
print(output_prompt)
|
300 |
return output_prompt
|
301 |
|
@@ -389,8 +386,6 @@ def generate_30(
|
|
389 |
samples=1,
|
390 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
391 |
):
|
392 |
-
#global captioner_2
|
393 |
-
#captioner2=captioner_2
|
394 |
seed = random.randint(0, MAX_SEED)
|
395 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
396 |
if latent_file is not None: # Check if a latent file is provided
|
@@ -398,62 +393,45 @@ def generate_30(
|
|
398 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
399 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
400 |
caption=[]
|
401 |
-
|
402 |
-
#caption.append(captioner(sd_image_a))
|
403 |
-
#caption.append(captioner2(sd_image_a))
|
404 |
-
#caption.append(captioner_3(sd_image_a))
|
405 |
-
caption_2.append(captioning(sd_image_a))
|
406 |
if latent_file_2 is not None: # Check if a latent file is provided
|
407 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
408 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
409 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
410 |
-
|
411 |
-
##caption.append(captioner2(sd_image_b))
|
412 |
-
#caption.append(captioner_3(sd_image_b))
|
413 |
-
caption_2.append(captioning(sd_image_b))
|
414 |
else:
|
415 |
sd_image_b = None
|
416 |
if latent_file_3 is not None: # Check if a latent file is provided
|
417 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
418 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
419 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
420 |
-
|
421 |
-
#caption.append(captioner2(sd_image_c))
|
422 |
-
#caption.append(captioner_3(sd_image_c))
|
423 |
-
caption_2.append(captioning(sd_image_c))
|
424 |
else:
|
425 |
sd_image_c = None
|
426 |
if latent_file_4 is not None: # Check if a latent file is provided
|
427 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
428 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
429 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
430 |
-
|
431 |
-
#caption.append(captioner2(sd_image_d))
|
432 |
-
#caption.append(captioner_3(sd_image_d))
|
433 |
-
caption_2.append(captioning(sd_image_d))
|
434 |
else:
|
435 |
sd_image_d = None
|
436 |
if latent_file_5 is not None: # Check if a latent file is provided
|
437 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
438 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
439 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
440 |
-
|
441 |
-
#caption.append(captioner2(sd_image_e))
|
442 |
-
#caption.append(captioner_3(sd_image_e))
|
443 |
-
caption_2.append(captioning(sd_image_e))
|
444 |
else:
|
445 |
sd_image_e = None
|
446 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
447 |
-
filename= f'
|
448 |
print("-- using image file --")
|
449 |
-
captions =prompt+
|
450 |
captions = flatten_and_stringify(captions)
|
451 |
captions = " ".join(captions)
|
452 |
print(captions)
|
453 |
-
print("-- generating further caption --")
|
454 |
global model5
|
455 |
global processor5
|
456 |
-
del captioner2
|
457 |
del model5
|
458 |
del processor5
|
459 |
gc.collect()
|
@@ -466,19 +444,12 @@ def generate_30(
|
|
466 |
print(new_prompt)
|
467 |
print("-- FINAL PROMPT --")
|
468 |
print("-- ------------ --")
|
469 |
-
#global model
|
470 |
-
#global txt_tokenizer
|
471 |
-
#del model
|
472 |
-
#del txt_tokenizer
|
473 |
gc.collect()
|
474 |
torch.cuda.empty_cache()
|
475 |
global text_encoder_1
|
476 |
global text_encoder_2
|
477 |
-
#global unetX
|
478 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
479 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
480 |
-
#pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
|
481 |
-
#ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
482 |
print('-- generating image --')
|
483 |
sd_image = ip_model.generate(
|
484 |
pil_image_1=sd_image_a,
|
@@ -540,8 +511,6 @@ def generate_60(
|
|
540 |
samples=1,
|
541 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
542 |
):
|
543 |
-
#global captioner_2
|
544 |
-
#captioner2=captioner_2
|
545 |
seed = random.randint(0, MAX_SEED)
|
546 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
547 |
if latent_file is not None: # Check if a latent file is provided
|
@@ -549,62 +518,45 @@ def generate_60(
|
|
549 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
550 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
551 |
caption=[]
|
552 |
-
|
553 |
-
#caption.append(captioner(sd_image_a))
|
554 |
-
#caption.append(captioner2(sd_image_a))
|
555 |
-
#caption.append(captioner_3(sd_image_a))
|
556 |
-
caption_2.append(captioning(sd_image_a))
|
557 |
if latent_file_2 is not None: # Check if a latent file is provided
|
558 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
559 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
560 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
561 |
-
|
562 |
-
#caption.append(captioner2(sd_image_b))
|
563 |
-
#caption.append(captioner_3(sd_image_b))
|
564 |
-
caption_2.append(captioning(sd_image_b))
|
565 |
else:
|
566 |
sd_image_b = None
|
567 |
if latent_file_3 is not None: # Check if a latent file is provided
|
568 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
569 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
570 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
571 |
-
|
572 |
-
caption.append(captioner2(sd_image_c))
|
573 |
-
#caption.append(captioner_3(sd_image_c))
|
574 |
-
caption_2.append(captioning(sd_image_c))
|
575 |
else:
|
576 |
sd_image_c = None
|
577 |
if latent_file_4 is not None: # Check if a latent file is provided
|
578 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
579 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
580 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
581 |
-
|
582 |
-
caption.append(captioner2(sd_image_d))
|
583 |
-
#caption.append(captioner_3(sd_image_d))
|
584 |
-
caption_2.append(captioning(sd_image_d))
|
585 |
else:
|
586 |
sd_image_d = None
|
587 |
if latent_file_5 is not None: # Check if a latent file is provided
|
588 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
589 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
590 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
591 |
-
|
592 |
-
caption.append(captioner2(sd_image_e))
|
593 |
-
#caption.append(captioner_3(sd_image_e))
|
594 |
-
caption_2.append(captioning(sd_image_e))
|
595 |
else:
|
596 |
sd_image_e = None
|
597 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
598 |
-
filename= f'
|
599 |
print("-- using image file --")
|
600 |
-
captions =prompt+
|
601 |
captions = flatten_and_stringify(captions)
|
602 |
captions = " ".join(captions)
|
603 |
print(captions)
|
604 |
-
print("-- generating further caption --")
|
605 |
global model5
|
606 |
global processor5
|
607 |
-
del captioner2
|
608 |
del model5
|
609 |
del processor5
|
610 |
gc.collect()
|
@@ -617,19 +569,12 @@ def generate_60(
|
|
617 |
print(new_prompt)
|
618 |
print("-- FINAL PROMPT --")
|
619 |
print("-- ------------ --")
|
620 |
-
#global model
|
621 |
-
#global txt_tokenizer
|
622 |
-
#del model
|
623 |
-
#del txt_tokenizer
|
624 |
gc.collect()
|
625 |
torch.cuda.empty_cache()
|
626 |
global text_encoder_1
|
627 |
global text_encoder_2
|
628 |
-
#global unetX
|
629 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
630 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
631 |
-
#pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
|
632 |
-
#ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
633 |
print('-- generating image --')
|
634 |
sd_image = ip_model.generate(
|
635 |
pil_image_1=sd_image_a,
|
@@ -691,8 +636,6 @@ def generate_90(
|
|
691 |
samples=1,
|
692 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
693 |
):
|
694 |
-
#global captioner_2
|
695 |
-
#captioner2=captioner_2
|
696 |
seed = random.randint(0, MAX_SEED)
|
697 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
698 |
if latent_file is not None: # Check if a latent file is provided
|
@@ -700,63 +643,45 @@ def generate_90(
|
|
700 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
701 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
702 |
caption=[]
|
703 |
-
|
704 |
-
#caption.append(captioner(sd_image_a))
|
705 |
-
cap=captioner2(sd_image_a)
|
706 |
-
caption.append(cap)
|
707 |
-
#caption.append(captioner_3(sd_image_a))
|
708 |
-
caption_2.append(captioning(sd_image_a))
|
709 |
if latent_file_2 is not None: # Check if a latent file is provided
|
710 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
711 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
712 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
713 |
-
|
714 |
-
#caption.append(captioner2(sd_image_b))
|
715 |
-
#caption.append(captioner_3(sd_image_b))
|
716 |
-
caption_2.append(captioning(sd_image_b))
|
717 |
else:
|
718 |
sd_image_b = None
|
719 |
if latent_file_3 is not None: # Check if a latent file is provided
|
720 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
721 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
722 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
723 |
-
|
724 |
-
caption.append(captioner2(sd_image_c))
|
725 |
-
#caption.append(captioner_3(sd_image_c))
|
726 |
-
caption_2.append(captioning(sd_image_c))
|
727 |
else:
|
728 |
sd_image_c = None
|
729 |
if latent_file_4 is not None: # Check if a latent file is provided
|
730 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
731 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
732 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
733 |
-
|
734 |
-
caption.append(captioner2(sd_image_d))
|
735 |
-
#caption.append(captioner_3(sd_image_d))
|
736 |
-
caption_2.append(captioning(sd_image_d))
|
737 |
else:
|
738 |
sd_image_d = None
|
739 |
if latent_file_5 is not None: # Check if a latent file is provided
|
740 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
741 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
742 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
743 |
-
|
744 |
-
caption.append(captioner2(sd_image_e))
|
745 |
-
#caption.append(captioner_3(sd_image_e))
|
746 |
-
caption_2.append(captioning(sd_image_e))
|
747 |
else:
|
748 |
sd_image_e = None
|
749 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
750 |
-
filename= f'
|
751 |
print("-- using image file --")
|
752 |
-
captions =prompt+
|
753 |
captions = flatten_and_stringify(captions)
|
754 |
captions = " ".join(captions)
|
755 |
print(captions)
|
756 |
-
print("-- generating further caption --")
|
757 |
global model5
|
758 |
global processor5
|
759 |
-
del captioner2
|
760 |
del model5
|
761 |
del processor5
|
762 |
gc.collect()
|
@@ -769,19 +694,12 @@ def generate_90(
|
|
769 |
print(new_prompt)
|
770 |
print("-- FINAL PROMPT --")
|
771 |
print("-- ------------ --")
|
772 |
-
#global model
|
773 |
-
#global txt_tokenizer
|
774 |
-
#del model
|
775 |
-
#del txt_tokenizer
|
776 |
gc.collect()
|
777 |
torch.cuda.empty_cache()
|
778 |
global text_encoder_1
|
779 |
global text_encoder_2
|
780 |
-
#global unetX
|
781 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
782 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
783 |
-
#pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
|
784 |
-
#ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
785 |
print('-- generating image --')
|
786 |
sd_image = ip_model.generate(
|
787 |
pil_image_1=sd_image_a,
|
|
|
23 |
import gc
|
24 |
import torch
|
25 |
from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
26 |
+
from transformers import CLIPTextModelWithProjection, CLIPTextModel
|
27 |
+
#from transformers import AutoTokenizer, AutoModelForCausalLM
|
28 |
+
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
29 |
+
from transformers import Phi3ForCausalLM
|
30 |
+
from transformers import pipeline
|
31 |
+
|
32 |
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
|
33 |
torch.backends.cuda.matmul.allow_tf32 = False
|
34 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
234 |
def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
|
235 |
filename= f'IP_{timestamp}.txt'
|
236 |
with open(filename, "w") as f:
|
237 |
+
f.write(f"Realvis 5.0 IP Adapter Test B\n")
|
238 |
f.write(f"Date/time: {timestamp} \n")
|
239 |
f.write(f"Prompt: {prompt} \n")
|
240 |
f.write(f"Steps: {num_inference_steps} \n")
|
|
|
264 |
**inputsa,
|
265 |
do_sample=False,
|
266 |
num_beams=5,
|
267 |
+
max_length=128,
|
268 |
min_length=1,
|
269 |
top_p=0.9,
|
270 |
repetition_penalty=1.5,
|
|
|
289 |
length_penalty=1.0,
|
290 |
temperature=1,
|
291 |
)
|
|
|
292 |
generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
293 |
response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
|
294 |
output_prompt.append(response_text)
|
295 |
print(f"{response_text}\n") # Print only the response text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
print(output_prompt)
|
297 |
return output_prompt
|
298 |
|
|
|
386 |
samples=1,
|
387 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
388 |
):
|
|
|
|
|
389 |
seed = random.randint(0, MAX_SEED)
|
390 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
391 |
if latent_file is not None: # Check if a latent file is provided
|
|
|
393 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
394 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
395 |
caption=[]
|
396 |
+
caption.append(captioning(sd_image_a))
|
|
|
|
|
|
|
|
|
397 |
if latent_file_2 is not None: # Check if a latent file is provided
|
398 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
399 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
400 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
401 |
+
caption.append(captioning(sd_image_b))
|
|
|
|
|
|
|
402 |
else:
|
403 |
sd_image_b = None
|
404 |
if latent_file_3 is not None: # Check if a latent file is provided
|
405 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
406 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
407 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
408 |
+
caption.append(captioning(sd_image_c))
|
|
|
|
|
|
|
409 |
else:
|
410 |
sd_image_c = None
|
411 |
if latent_file_4 is not None: # Check if a latent file is provided
|
412 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
413 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
414 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
415 |
+
caption.append(captioning(sd_image_d))
|
|
|
|
|
|
|
416 |
else:
|
417 |
sd_image_d = None
|
418 |
if latent_file_5 is not None: # Check if a latent file is provided
|
419 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
420 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
421 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
422 |
+
caption.append(captioning(sd_image_e))
|
|
|
|
|
|
|
423 |
else:
|
424 |
sd_image_e = None
|
425 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
426 |
+
filename= f'rv_IPb_{timestamp}.png'
|
427 |
print("-- using image file --")
|
428 |
+
captions =prompt+caption
|
429 |
captions = flatten_and_stringify(captions)
|
430 |
captions = " ".join(captions)
|
431 |
print(captions)
|
432 |
+
print("-- not generating further caption --")
|
433 |
global model5
|
434 |
global processor5
|
|
|
435 |
del model5
|
436 |
del processor5
|
437 |
gc.collect()
|
|
|
444 |
print(new_prompt)
|
445 |
print("-- FINAL PROMPT --")
|
446 |
print("-- ------------ --")
|
|
|
|
|
|
|
|
|
447 |
gc.collect()
|
448 |
torch.cuda.empty_cache()
|
449 |
global text_encoder_1
|
450 |
global text_encoder_2
|
|
|
451 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
452 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
|
|
|
|
453 |
print('-- generating image --')
|
454 |
sd_image = ip_model.generate(
|
455 |
pil_image_1=sd_image_a,
|
|
|
511 |
samples=1,
|
512 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
513 |
):
|
|
|
|
|
514 |
seed = random.randint(0, MAX_SEED)
|
515 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
516 |
if latent_file is not None: # Check if a latent file is provided
|
|
|
518 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
519 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
520 |
caption=[]
|
521 |
+
caption.append(captioning(sd_image_a))
|
|
|
|
|
|
|
|
|
522 |
if latent_file_2 is not None: # Check if a latent file is provided
|
523 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
524 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
525 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
526 |
+
caption.append(captioning(sd_image_b))
|
|
|
|
|
|
|
527 |
else:
|
528 |
sd_image_b = None
|
529 |
if latent_file_3 is not None: # Check if a latent file is provided
|
530 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
531 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
532 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
533 |
+
caption.append(captioning(sd_image_c))
|
|
|
|
|
|
|
534 |
else:
|
535 |
sd_image_c = None
|
536 |
if latent_file_4 is not None: # Check if a latent file is provided
|
537 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
538 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
539 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
540 |
+
caption.append(captioning(sd_image_d))
|
|
|
|
|
|
|
541 |
else:
|
542 |
sd_image_d = None
|
543 |
if latent_file_5 is not None: # Check if a latent file is provided
|
544 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
545 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
546 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
547 |
+
caption.append(captioning(sd_image_e))
|
|
|
|
|
|
|
548 |
else:
|
549 |
sd_image_e = None
|
550 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
551 |
+
filename= f'rv_IPb_{timestamp}.png'
|
552 |
print("-- using image file --")
|
553 |
+
captions =prompt+caption
|
554 |
captions = flatten_and_stringify(captions)
|
555 |
captions = " ".join(captions)
|
556 |
print(captions)
|
557 |
+
print("-- not generating further caption --")
|
558 |
global model5
|
559 |
global processor5
|
|
|
560 |
del model5
|
561 |
del processor5
|
562 |
gc.collect()
|
|
|
569 |
print(new_prompt)
|
570 |
print("-- FINAL PROMPT --")
|
571 |
print("-- ------------ --")
|
|
|
|
|
|
|
|
|
572 |
gc.collect()
|
573 |
torch.cuda.empty_cache()
|
574 |
global text_encoder_1
|
575 |
global text_encoder_2
|
|
|
576 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
577 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
|
|
|
|
578 |
print('-- generating image --')
|
579 |
sd_image = ip_model.generate(
|
580 |
pil_image_1=sd_image_a,
|
|
|
636 |
samples=1,
|
637 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
638 |
):
|
|
|
|
|
639 |
seed = random.randint(0, MAX_SEED)
|
640 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
641 |
if latent_file is not None: # Check if a latent file is provided
|
|
|
643 |
sd_image_a.resize((224,224), Image.LANCZOS)
|
644 |
#sd_image_a.resize((height,width), Image.LANCZOS)
|
645 |
caption=[]
|
646 |
+
caption.append(captioning(sd_image_a))
|
|
|
|
|
|
|
|
|
|
|
647 |
if latent_file_2 is not None: # Check if a latent file is provided
|
648 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
649 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
650 |
sd_image_b.resize((224,224), Image.LANCZOS)
|
651 |
+
caption.append(captioning(sd_image_b))
|
|
|
|
|
|
|
652 |
else:
|
653 |
sd_image_b = None
|
654 |
if latent_file_3 is not None: # Check if a latent file is provided
|
655 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
656 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
657 |
sd_image_c.resize((224,224), Image.LANCZOS)
|
658 |
+
caption.append(captioning(sd_image_c))
|
|
|
|
|
|
|
659 |
else:
|
660 |
sd_image_c = None
|
661 |
if latent_file_4 is not None: # Check if a latent file is provided
|
662 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
663 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
664 |
sd_image_d.resize((224,224), Image.LANCZOS)
|
665 |
+
caption.append(captioning(sd_image_d))
|
|
|
|
|
|
|
666 |
else:
|
667 |
sd_image_d = None
|
668 |
if latent_file_5 is not None: # Check if a latent file is provided
|
669 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
670 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
671 |
sd_image_e.resize((224,224), Image.LANCZOS)
|
672 |
+
caption.append(captioning(sd_image_e))
|
|
|
|
|
|
|
673 |
else:
|
674 |
sd_image_e = None
|
675 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
676 |
+
filename= f'rv_IPb_{timestamp}.png'
|
677 |
print("-- using image file --")
|
678 |
+
captions =prompt+caption
|
679 |
captions = flatten_and_stringify(captions)
|
680 |
captions = " ".join(captions)
|
681 |
print(captions)
|
682 |
+
print("-- not generating further caption --")
|
683 |
global model5
|
684 |
global processor5
|
|
|
685 |
del model5
|
686 |
del processor5
|
687 |
gc.collect()
|
|
|
694 |
print(new_prompt)
|
695 |
print("-- FINAL PROMPT --")
|
696 |
print("-- ------------ --")
|
|
|
|
|
|
|
|
|
697 |
gc.collect()
|
698 |
torch.cuda.empty_cache()
|
699 |
global text_encoder_1
|
700 |
global text_encoder_2
|
|
|
701 |
pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
|
702 |
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
|
|
|
|
703 |
print('-- generating image --')
|
704 |
sd_image = ip_model.generate(
|
705 |
pil_image_1=sd_image_a,
|