RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on Jan 20

Commit

631e75c

verified ·

1 Parent(s): c0ef521

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -6

app.py CHANGED Viewed

@@ -11,9 +11,6 @@ import uuid
 import gradio as gr
 import numpy as np
 from PIL import Image
-import torch
-from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
-from transformers import CLIPTextModelWithProjection, CLIPTextModel
 from typing import Tuple
 import paramiko
 import datetime
@@ -21,6 +18,9 @@ from gradio import themes
 from image_gen_aux import UpscaleWithModel
 from ip_adapter import IPAdapterXL
 from huggingface_hub import snapshot_download
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -156,6 +156,17 @@ def load_and_prepare_model():
 # Preload and compile both models
 pipe = load_and_prepare_model()
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
@@ -229,29 +240,43 @@ def generate_30(
     if latent_file is not None:  # Check if a latent file is provided
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         sd_image_a.resize((height,width), Image.LANCZOS)
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             sd_image_b.resize((height,width), Image.LANCZOS)
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             sd_image_c.resize((height,width), Image.LANCZOS)
-        else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             sd_image_d.resize((height,width), Image.LANCZOS)
-        else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             sd_image_e.resize((height,width), Image.LANCZOS)
-        else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,

 import gradio as gr
 import numpy as np
 from PIL import Image
 from typing import Tuple
 import paramiko
 import datetime
 from image_gen_aux import UpscaleWithModel
 from ip_adapter import IPAdapterXL
 from huggingface_hub import snapshot_download
+import torch
+from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
+from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 # Preload and compile both models
 pipe = load_and_prepare_model()
+# text models
+captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
+captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
+captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cuda', task="image-to-text")
+#model5 = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
+ #model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
+#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
+ #processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
 text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
     if latent_file is not None:  # Check if a latent file is provided
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         sd_image_a.resize((height,width), Image.LANCZOS)
+        caption=[]
+        caption.append(captioner(sd_image_a))
+        caption.append(captioner_2(sd_image_a))
+        caption.append(captioner_3(sd_image_a))
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
             sd_image_b.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_a))
+            caption.append(captioner_2(sd_image_a))
+            caption.append(captioner_3(sd_image_a))
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
             sd_image_c.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_a))
+            caption.append(captioner_2(sd_image_a))
+            caption.append(captioner_3(sd_image_a))        else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
             sd_image_d.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_a))
+            caption.append(captioner_2(sd_image_a))
+            caption.append(captioner_3(sd_image_a))        else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
             sd_image_e.resize((height,width), Image.LANCZOS)
+            caption.append(captioner(sd_image_a))
+            caption.append(captioner_2(sd_image_a))
+            caption.append(captioner_3(sd_image_a))        else:
             sd_image_e = None
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
+        print(caption)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,