Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
# in the Software without restriction, including without limitation the rights
|
5 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
6 |
# copies of the Software, and to permit persons to whom the Software is
|
|
|
7 |
import spaces
|
8 |
import os
|
9 |
import random
|
@@ -20,7 +21,7 @@ from ip_adapter import IPAdapterXL
|
|
20 |
from huggingface_hub import snapshot_download
|
21 |
import torch
|
22 |
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
23 |
-
from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
|
24 |
|
25 |
torch.backends.cuda.matmul.allow_tf32 = False
|
26 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
@@ -165,7 +166,9 @@ captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cu
|
|
165 |
#model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
|
166 |
#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
167 |
#processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
|
168 |
-
|
|
|
|
|
169 |
|
170 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
171 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
@@ -175,6 +178,27 @@ MAX_SEED = np.iinfo(np.int32).max
|
|
175 |
|
176 |
neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
def upload_to_ftp(filename):
|
179 |
try:
|
180 |
transport = paramiko.Transport((FTP_HOST, 22))
|
@@ -277,6 +301,62 @@ def generate_30(
|
|
277 |
filename= f'rv_IP_{timestamp}.png'
|
278 |
print("-- using image file --")
|
279 |
print(caption)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
print('-- generating image --')
|
281 |
sd_image = ip_model.generate(
|
282 |
pil_image_1=sd_image_a,
|
|
|
4 |
# in the Software without restriction, including without limitation the rights
|
5 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
6 |
# copies of the Software, and to permit persons to whom the Software is
|
7 |
+
|
8 |
import spaces
|
9 |
import os
|
10 |
import random
|
|
|
21 |
from huggingface_hub import snapshot_download
|
22 |
import torch
|
23 |
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
24 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
|
25 |
|
26 |
torch.backends.cuda.matmul.allow_tf32 = False
|
27 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
166 |
#model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
|
167 |
#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
168 |
#processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
|
169 |
+
txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
|
170 |
+
txt_tokenizer.tokenizer_legacy=False
|
171 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
|
172 |
|
173 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
174 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
|
|
178 |
|
179 |
neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
|
180 |
|
181 |
+
def filter_text(text,phraseC):
|
182 |
+
"""Filters out the text up to and including 'Rewritten Prompt:'."""
|
183 |
+
phrase = "Rewritten Prompt:"
|
184 |
+
phraseB = "rewritten text:"
|
185 |
+
pattern = f"(.*?){re.escape(phrase)}(.*)"
|
186 |
+
patternB = f"(.*?){re.escape(phraseB)}(.*)"
|
187 |
+
# matchB = re.search(patternB, text)
|
188 |
+
matchB = re.search(patternB, text, flags=re.DOTALL)
|
189 |
+
if matchB:
|
190 |
+
filtered_text = matchB.group(2)
|
191 |
+
match = re.search(pattern, filtered_text, flags=re.DOTALL)
|
192 |
+
if match:
|
193 |
+
filtered_text = match.group(2)
|
194 |
+
filtered_text = re.sub(phraseC, "", filtered_text, flags=re.DOTALL) # Replaces the matched pattern with an empty string
|
195 |
+
return filtered_text
|
196 |
+
else:
|
197 |
+
return filtered_text
|
198 |
+
else:
|
199 |
+
# Handle the case where no match is found
|
200 |
+
return text
|
201 |
+
|
202 |
def upload_to_ftp(filename):
|
203 |
try:
|
204 |
transport = paramiko.Transport((FTP_HOST, 22))
|
|
|
301 |
filename= f'rv_IP_{timestamp}.png'
|
302 |
print("-- using image file --")
|
303 |
print(caption)
|
304 |
+
print("-- generating further caption --")
|
305 |
+
|
306 |
+
|
307 |
+
system_prompt_rewrite = (
|
308 |
+
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
309 |
+
)
|
310 |
+
user_prompt_rewrite = (
|
311 |
+
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
312 |
+
)
|
313 |
+
user_prompt_rewrite_2 = (
|
314 |
+
"Rephrase this scene to have more elaborate details: "
|
315 |
+
)
|
316 |
+
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
317 |
+
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
318 |
+
print("-- got prompt --")
|
319 |
+
# Encode the input text and include the attention mask
|
320 |
+
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
321 |
+
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
322 |
+
# Ensure all values are on the correct device
|
323 |
+
input_ids = encoded_inputs["input_ids"].to(device)
|
324 |
+
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
325 |
+
attention_mask = encoded_inputs["attention_mask"].to(device)
|
326 |
+
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
327 |
+
print("-- tokenize prompt --")
|
328 |
+
# Google T5
|
329 |
+
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
330 |
+
outputs = model.generate(
|
331 |
+
input_ids=input_ids,
|
332 |
+
attention_mask=attention_mask,
|
333 |
+
max_new_tokens=512,
|
334 |
+
temperature=0.2,
|
335 |
+
top_p=0.9,
|
336 |
+
do_sample=True,
|
337 |
+
)
|
338 |
+
outputs_2 = model.generate(
|
339 |
+
input_ids=input_ids_2,
|
340 |
+
attention_mask=attention_mask_2,
|
341 |
+
max_new_tokens=65,
|
342 |
+
temperature=0.2,
|
343 |
+
top_p=0.9,
|
344 |
+
do_sample=True,
|
345 |
+
)
|
346 |
+
# Use the encoded tensor 'text_inputs' here
|
347 |
+
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
348 |
+
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
349 |
+
print('-- generated prompt --')
|
350 |
+
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
351 |
+
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
352 |
+
print('-- filtered prompt --')
|
353 |
+
print(enhanced_prompt)
|
354 |
+
print('-- filtered prompt 2 --')
|
355 |
+
print(enhanced_prompt_2)
|
356 |
+
|
357 |
+
|
358 |
+
|
359 |
+
|
360 |
print('-- generating image --')
|
361 |
sd_image = ip_model.generate(
|
362 |
pil_image_1=sd_image_a,
|