RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on Jan 21

Commit

d793d7f

verified ·

1 Parent(s): bdc3a73

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -114

app.py CHANGED Viewed

@@ -23,7 +23,12 @@ from huggingface_hub import snapshot_download
 import gc
 import torch
 from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
-from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
 from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -229,7 +234,7 @@ def save_image(img):
 def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
     filename= f'IP_{timestamp}.txt'
     with open(filename, "w") as f:
-        f.write(f"Realvis 5.0 IP Adapter \n")
         f.write(f"Date/time: {timestamp} \n")
         f.write(f"Prompt: {prompt} \n")
         f.write(f"Steps: {num_inference_steps} \n")
@@ -259,7 +264,7 @@ def captioning(img):
         **inputsa,
         do_sample=False,
         num_beams=5,
-        max_length=256,
         min_length=1,
         top_p=0.9,
         repetition_penalty=1.5,
@@ -284,18 +289,10 @@ def captioning(img):
             length_penalty=1.0,
             temperature=1,
         )
-        # Adjust max_length if needed
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
         print(f"{response_text}\n") # Print only the response text
-    # Continue conversation:
-   # inputf = processor5(images=img, text=generated_text + 'So therefore', return_tensors="pt").to('cuda')
-   # generated_ids = model5.generate(**inputf, min_length=24, max_length=42)
-  #  generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-   # response_text = generated_text.replace(generated_text, "").strip()  # Remove the previous text plus 'So therefore'
-  # print(response_text)
-    #output_prompt.append(response_text)
     print(output_prompt)
     return output_prompt
@@ -389,8 +386,6 @@ def generate_30(
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
-    #global captioner_2
-    #captioner2=captioner_2
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
@@ -398,62 +393,45 @@ def generate_30(
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
-        caption_2=[]
-        #caption.append(captioner(sd_image_a))
-        #caption.append(captioner2(sd_image_a))
-        #caption.append(captioner_3(sd_image_a))
-        caption_2.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_b))
-            ##caption.append(captioner2(sd_image_b))
-            #caption.append(captioner_3(sd_image_b))
-            caption_2.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_c))
-            #caption.append(captioner2(sd_image_c))
-            #caption.append(captioner_3(sd_image_c))
-            caption_2.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_d))
-            #caption.append(captioner2(sd_image_d))
-            #caption.append(captioner_3(sd_image_d))
-            caption_2.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_e))
-            #caption.append(captioner2(sd_image_e))
-            #caption.append(captioner_3(sd_image_e))
-            caption_2.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        captions =prompt+ caption+caption_2
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
-        print("-- generating further caption --")
         global model5
         global processor5
-        del captioner2
         del model5
         del processor5
         gc.collect()
@@ -466,19 +444,12 @@ def generate_30(
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
-        #global model
-        #global txt_tokenizer
-        #del model
-        #del txt_tokenizer
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
-        #global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
-        #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
-        #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,
@@ -540,8 +511,6 @@ def generate_60(
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
-    #global captioner_2
-    #captioner2=captioner_2
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
@@ -549,62 +518,45 @@ def generate_60(
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
-        caption_2=[]
-        #caption.append(captioner(sd_image_a))
-        #caption.append(captioner2(sd_image_a))
-        #caption.append(captioner_3(sd_image_a))
-        caption_2.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_b))
-            #caption.append(captioner2(sd_image_b))
-            #caption.append(captioner_3(sd_image_b))
-            caption_2.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_c))
-            caption.append(captioner2(sd_image_c))
-            #caption.append(captioner_3(sd_image_c))
-            caption_2.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_d))
-            caption.append(captioner2(sd_image_d))
-            #caption.append(captioner_3(sd_image_d))
-            caption_2.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_e))
-            caption.append(captioner2(sd_image_e))
-            #caption.append(captioner_3(sd_image_e))
-            caption_2.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        captions =prompt+ caption+caption_2
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
-        print("-- generating further caption --")
         global model5
         global processor5
-        del captioner2
         del model5
         del processor5
         gc.collect()
@@ -617,19 +569,12 @@ def generate_60(
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
-        #global model
-        #global txt_tokenizer
-        #del model
-        #del txt_tokenizer
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
-        #global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
-        #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
-        #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,
@@ -691,8 +636,6 @@ def generate_90(
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
-    #global captioner_2
-    #captioner2=captioner_2
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
@@ -700,63 +643,45 @@ def generate_90(
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
-        caption_2=[]
-        #caption.append(captioner(sd_image_a))
-        cap=captioner2(sd_image_a)
-        caption.append(cap)
-        #caption.append(captioner_3(sd_image_a))
-        caption_2.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_b))
-            #caption.append(captioner2(sd_image_b))
-            #caption.append(captioner_3(sd_image_b))
-            caption_2.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_c))
-            caption.append(captioner2(sd_image_c))
-            #caption.append(captioner_3(sd_image_c))
-            caption_2.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_d))
-            caption.append(captioner2(sd_image_d))
-            #caption.append(captioner_3(sd_image_d))
-            caption_2.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
-            #caption.append(captioner(sd_image_e))
-            caption.append(captioner2(sd_image_e))
-            #caption.append(captioner_3(sd_image_e))
-            caption_2.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        captions =prompt+ caption+caption_2
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
-        print("-- generating further caption --")
         global model5
         global processor5
-        del captioner2
         del model5
         del processor5
         gc.collect()
@@ -769,19 +694,12 @@ def generate_90(
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
-        #global model
-        #global txt_tokenizer
-        #del model
-        #del txt_tokenizer
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
-        #global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
-        #pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
-        #ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,

 import gc
 import torch
 from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+from transformers import CLIPTextModelWithProjection, CLIPTextModel
+#from transformers import AutoTokenizer, AutoModelForCausalLM
+from transformers import Blip2Processor, Blip2ForConditionalGeneration
+from transformers import Phi3ForCausalLM
+from transformers import pipeline
 from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
     filename= f'IP_{timestamp}.txt'
     with open(filename, "w") as f:
+        f.write(f"Realvis 5.0 IP Adapter Test B\n")
         f.write(f"Date/time: {timestamp} \n")
         f.write(f"Prompt: {prompt} \n")
         f.write(f"Steps: {num_inference_steps} \n")
         **inputsa,
         do_sample=False,
         num_beams=5,
+        max_length=128,
         min_length=1,
         top_p=0.9,
         repetition_penalty=1.5,
             length_penalty=1.0,
             temperature=1,
         )
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
         print(f"{response_text}\n") # Print only the response text
     print(output_prompt)
     return output_prompt
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
+        caption.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename= f'rv_IPb_{timestamp}.png'
         print("-- using image file --")
+        captions =prompt+caption
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
+        print("-- not generating further caption --")
         global model5
         global processor5
         del model5
         del processor5
         gc.collect()
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
+        caption.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename= f'rv_IPb_{timestamp}.png'
         print("-- using image file --")
+        captions =prompt+caption
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
+        print("-- not generating further caption --")
         global model5
         global processor5
         del model5
         del processor5
         gc.collect()
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
         sd_image_a.resize((224,224), Image.LANCZOS)
         #sd_image_a.resize((height,width), Image.LANCZOS)
         caption=[]
+        caption.append(captioning(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             #sd_image_b.resize((height,width), Image.LANCZOS)
             sd_image_b.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_b))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             #sd_image_c.resize((height,width), Image.LANCZOS)
             sd_image_c.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_c))
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             #sd_image_d.resize((height,width), Image.LANCZOS)
             sd_image_d.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_d))
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             #sd_image_e.resize((height,width), Image.LANCZOS)
             sd_image_e.resize((224,224), Image.LANCZOS)
+            caption.append(captioning(sd_image_e))
         else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        filename= f'rv_IPb_{timestamp}.png'
         print("-- using image file --")
+        captions =prompt+caption
         captions = flatten_and_stringify(captions)
         captions = " ".join(captions)
         print(captions)
+        print("-- not generating further caption --")
         global model5
         global processor5
         del model5
         del processor5
         gc.collect()
         print(new_prompt)
         print("-- FINAL PROMPT --")
         print("-- ------------ --")
         gc.collect()
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,