1inkusFace commited on
Commit
631e75c
·
verified ·
1 Parent(s): c0ef521

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -11,9 +11,6 @@ import uuid
11
  import gradio as gr
12
  import numpy as np
13
  from PIL import Image
14
- import torch
15
- from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
16
- from transformers import CLIPTextModelWithProjection, CLIPTextModel
17
  from typing import Tuple
18
  import paramiko
19
  import datetime
@@ -21,6 +18,9 @@ from gradio import themes
21
  from image_gen_aux import UpscaleWithModel
22
  from ip_adapter import IPAdapterXL
23
  from huggingface_hub import snapshot_download
 
 
 
24
 
25
  torch.backends.cuda.matmul.allow_tf32 = False
26
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -156,6 +156,17 @@ def load_and_prepare_model():
156
 
157
  # Preload and compile both models
158
  pipe = load_and_prepare_model()
 
 
 
 
 
 
 
 
 
 
 
159
  ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
160
  text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
161
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
@@ -229,29 +240,43 @@ def generate_30(
229
  if latent_file is not None: # Check if a latent file is provided
230
  sd_image_a = Image.open(latent_file.name).convert('RGB')
231
  sd_image_a.resize((height,width), Image.LANCZOS)
 
 
 
 
232
  if latent_file_2 is not None: # Check if a latent file is provided
233
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
234
  sd_image_b.resize((height,width), Image.LANCZOS)
 
 
 
235
  else:
236
  sd_image_b = None
237
  if latent_file_3 is not None: # Check if a latent file is provided
238
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
239
  sd_image_c.resize((height,width), Image.LANCZOS)
240
- else:
 
 
241
  sd_image_c = None
242
  if latent_file_4 is not None: # Check if a latent file is provided
243
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
244
  sd_image_d.resize((height,width), Image.LANCZOS)
245
- else:
 
 
246
  sd_image_d = None
247
  if latent_file_5 is not None: # Check if a latent file is provided
248
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
249
  sd_image_e.resize((height,width), Image.LANCZOS)
250
- else:
 
 
251
  sd_image_e = None
252
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
253
  filename= f'rv_IP_{timestamp}.png'
254
  print("-- using image file --")
 
255
  print('-- generating image --')
256
  sd_image = ip_model.generate(
257
  pil_image_1=sd_image_a,
 
11
  import gradio as gr
12
  import numpy as np
13
  from PIL import Image
 
 
 
14
  from typing import Tuple
15
  import paramiko
16
  import datetime
 
18
  from image_gen_aux import UpscaleWithModel
19
  from ip_adapter import IPAdapterXL
20
  from huggingface_hub import snapshot_download
21
+ import torch
22
+ from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
23
+ from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
24
 
25
  torch.backends.cuda.matmul.allow_tf32 = False
26
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
156
 
157
  # Preload and compile both models
158
  pipe = load_and_prepare_model()
159
+
160
+ # text models
161
+ captioner = pipeline(model="ydshieh/vit-gpt2-coco-en",device='cuda', task="image-to-text")
162
+ captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
163
+ captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cuda', task="image-to-text")
164
+ #model5 = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
165
+ #model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
166
+ #processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
167
+ #processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
168
+
169
+
170
  ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
171
  text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
172
  text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
 
240
  if latent_file is not None: # Check if a latent file is provided
241
  sd_image_a = Image.open(latent_file.name).convert('RGB')
242
  sd_image_a.resize((height,width), Image.LANCZOS)
243
+ caption=[]
244
+ caption.append(captioner(sd_image_a))
245
+ caption.append(captioner_2(sd_image_a))
246
+ caption.append(captioner_3(sd_image_a))
247
  if latent_file_2 is not None: # Check if a latent file is provided
248
  sd_image_b = Image.open(latent_file_2.name).convert('RGB')
249
  sd_image_b.resize((height,width), Image.LANCZOS)
250
+ caption.append(captioner(sd_image_a))
251
+ caption.append(captioner_2(sd_image_a))
252
+ caption.append(captioner_3(sd_image_a))
253
  else:
254
  sd_image_b = None
255
  if latent_file_3 is not None: # Check if a latent file is provided
256
  sd_image_c = Image.open(latent_file_3.name).convert('RGB')
257
  sd_image_c.resize((height,width), Image.LANCZOS)
258
+ caption.append(captioner(sd_image_a))
259
+ caption.append(captioner_2(sd_image_a))
260
+ caption.append(captioner_3(sd_image_a)) else:
261
  sd_image_c = None
262
  if latent_file_4 is not None: # Check if a latent file is provided
263
  sd_image_d = Image.open(latent_file_4.name).convert('RGB')
264
  sd_image_d.resize((height,width), Image.LANCZOS)
265
+ caption.append(captioner(sd_image_a))
266
+ caption.append(captioner_2(sd_image_a))
267
+ caption.append(captioner_3(sd_image_a)) else:
268
  sd_image_d = None
269
  if latent_file_5 is not None: # Check if a latent file is provided
270
  sd_image_e = Image.open(latent_file_5.name).convert('RGB')
271
  sd_image_e.resize((height,width), Image.LANCZOS)
272
+ caption.append(captioner(sd_image_a))
273
+ caption.append(captioner_2(sd_image_a))
274
+ caption.append(captioner_3(sd_image_a)) else:
275
  sd_image_e = None
276
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
277
  filename= f'rv_IP_{timestamp}.png'
278
  print("-- using image file --")
279
+ print(caption)
280
  print('-- generating image --')
281
  sd_image = ip_model.generate(
282
  pil_image_1=sd_image_a,