Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -11,9 +11,6 @@ import uuid
|
|
11 |
import gradio as gr
|
12 |
import numpy as np
|
13 |
from PIL import Image
|
14 |
-
import torch
|
15 |
-
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
16 |
-
from transformers import CLIPTextModelWithProjection, CLIPTextModel
|
17 |
from typing import Tuple
|
18 |
import paramiko
|
19 |
import datetime
|
@@ -21,6 +18,9 @@ from gradio import themes
|
|
21 |
from image_gen_aux import UpscaleWithModel
|
22 |
from ip_adapter import IPAdapterXL
|
23 |
from huggingface_hub import snapshot_download
|
|
|
|
|
|
|
24 |
|
25 |
torch.backends.cuda.matmul.allow_tf32 = False
|
26 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
@@ -156,6 +156,17 @@ def load_and_prepare_model():
|
|
156 |
|
157 |
# Preload and compile both models
|
158 |
pipe = load_and_prepare_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
160 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
161 |
text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
|
@@ -229,29 +240,43 @@ def generate_30(
|
|
229 |
if latent_file is not None: # Check if a latent file is provided
|
230 |
sd_image_a = Image.open(latent_file.name).convert('RGB')
|
231 |
sd_image_a.resize((height,width), Image.LANCZOS)
|
|
|
|
|
|
|
|
|
232 |
if latent_file_2 is not None: # Check if a latent file is provided
|
233 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
234 |
sd_image_b.resize((height,width), Image.LANCZOS)
|
|
|
|
|
|
|
235 |
else:
|
236 |
sd_image_b = None
|
237 |
if latent_file_3 is not None: # Check if a latent file is provided
|
238 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
239 |
sd_image_c.resize((height,width), Image.LANCZOS)
|
240 |
-
|
|
|
|
|
241 |
sd_image_c = None
|
242 |
if latent_file_4 is not None: # Check if a latent file is provided
|
243 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
244 |
sd_image_d.resize((height,width), Image.LANCZOS)
|
245 |
-
|
|
|
|
|
246 |
sd_image_d = None
|
247 |
if latent_file_5 is not None: # Check if a latent file is provided
|
248 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
249 |
sd_image_e.resize((height,width), Image.LANCZOS)
|
250 |
-
|
|
|
|
|
251 |
sd_image_e = None
|
252 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
253 |
filename= f'rv_IP_{timestamp}.png'
|
254 |
print("-- using image file --")
|
|
|
255 |
print('-- generating image --')
|
256 |
sd_image = ip_model.generate(
|
257 |
pil_image_1=sd_image_a,
|
|
|
11 |
import gradio as gr
|
12 |
import numpy as np
|
13 |
from PIL import Image
|
|
|
|
|
|
|
14 |
from typing import Tuple
|
15 |
import paramiko
|
16 |
import datetime
|
|
|
18 |
from image_gen_aux import UpscaleWithModel
|
19 |
from ip_adapter import IPAdapterXL
|
20 |
from huggingface_hub import snapshot_download
|
21 |
+
import torch
|
22 |
+
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
23 |
+
from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
|
24 |
|
25 |
torch.backends.cuda.matmul.allow_tf32 = False
|
26 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
156 |
|
157 |
# Preload and compile both models
|
158 |
pipe = load_and_prepare_model()
|
159 |
+
|
160 |
+
# text models
|
161 |
+
captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
|
162 |
+
captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
|
163 |
+
captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cuda', task="image-to-text")
|
164 |
+
#model5 = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
165 |
+
#model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
|
166 |
+
#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
167 |
+
#processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
|
168 |
+
|
169 |
+
|
170 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
171 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
172 |
text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
|
|
|
240 |
if latent_file is not None: # Check if a latent file is provided
|
241 |
sd_image_a = Image.open(latent_file.name).convert('RGB')
|
242 |
sd_image_a.resize((height,width), Image.LANCZOS)
|
243 |
+
caption=[]
|
244 |
+
caption.append(captioner(sd_image_a))
|
245 |
+
caption.append(captioner_2(sd_image_a))
|
246 |
+
caption.append(captioner_3(sd_image_a))
|
247 |
if latent_file_2 is not None: # Check if a latent file is provided
|
248 |
sd_image_b = Image.open(latent_file_2.name).convert('RGB')
|
249 |
sd_image_b.resize((height,width), Image.LANCZOS)
|
250 |
+
caption.append(captioner(sd_image_a))
|
251 |
+
caption.append(captioner_2(sd_image_a))
|
252 |
+
caption.append(captioner_3(sd_image_a))
|
253 |
else:
|
254 |
sd_image_b = None
|
255 |
if latent_file_3 is not None: # Check if a latent file is provided
|
256 |
sd_image_c = Image.open(latent_file_3.name).convert('RGB')
|
257 |
sd_image_c.resize((height,width), Image.LANCZOS)
|
258 |
+
caption.append(captioner(sd_image_a))
|
259 |
+
caption.append(captioner_2(sd_image_a))
|
260 |
+
caption.append(captioner_3(sd_image_a)) else:
|
261 |
sd_image_c = None
|
262 |
if latent_file_4 is not None: # Check if a latent file is provided
|
263 |
sd_image_d = Image.open(latent_file_4.name).convert('RGB')
|
264 |
sd_image_d.resize((height,width), Image.LANCZOS)
|
265 |
+
caption.append(captioner(sd_image_a))
|
266 |
+
caption.append(captioner_2(sd_image_a))
|
267 |
+
caption.append(captioner_3(sd_image_a)) else:
|
268 |
sd_image_d = None
|
269 |
if latent_file_5 is not None: # Check if a latent file is provided
|
270 |
sd_image_e = Image.open(latent_file_5.name).convert('RGB')
|
271 |
sd_image_e.resize((height,width), Image.LANCZOS)
|
272 |
+
caption.append(captioner(sd_image_a))
|
273 |
+
caption.append(captioner_2(sd_image_a))
|
274 |
+
caption.append(captioner_3(sd_image_a)) else:
|
275 |
sd_image_e = None
|
276 |
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
277 |
filename= f'rv_IP_{timestamp}.png'
|
278 |
print("-- using image file --")
|
279 |
+
print(caption)
|
280 |
print('-- generating image --')
|
281 |
sd_image = ip_model.generate(
|
282 |
pil_image_1=sd_image_a,
|