Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
# in the Software without restriction, including without limitation the rights
|
| 5 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 6 |
# copies of the Software, and to permit persons to whom the Software is
|
|
|
|
| 7 |
import spaces
|
| 8 |
import os
|
| 9 |
import random
|
|
@@ -20,7 +21,7 @@ from ip_adapter import IPAdapterXL
|
|
| 20 |
from huggingface_hub import snapshot_download
|
| 21 |
import torch
|
| 22 |
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 23 |
-
from transformers import CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
|
| 24 |
|
| 25 |
torch.backends.cuda.matmul.allow_tf32 = False
|
| 26 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
@@ -165,7 +166,9 @@ captioner_3 = pipeline(model="Salesforce/blip-image-captioning-large",device='cu
|
|
| 165 |
#model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
|
| 166 |
#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
| 167 |
#processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
|
| 168 |
-
|
|
|
|
|
|
|
| 169 |
|
| 170 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
| 171 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
|
@@ -175,6 +178,27 @@ MAX_SEED = np.iinfo(np.int32).max
|
|
| 175 |
|
| 176 |
neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
def upload_to_ftp(filename):
|
| 179 |
try:
|
| 180 |
transport = paramiko.Transport((FTP_HOST, 22))
|
|
@@ -277,6 +301,62 @@ def generate_30(
|
|
| 277 |
filename= f'rv_IP_{timestamp}.png'
|
| 278 |
print("-- using image file --")
|
| 279 |
print(caption)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
print('-- generating image --')
|
| 281 |
sd_image = ip_model.generate(
|
| 282 |
pil_image_1=sd_image_a,
|
|
|
|
| 4 |
# in the Software without restriction, including without limitation the rights
|
| 5 |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 6 |
# copies of the Software, and to permit persons to whom the Software is
|
| 7 |
+
|
| 8 |
import spaces
|
| 9 |
import os
|
| 10 |
import random
|
|
|
|
| 21 |
from huggingface_hub import snapshot_download
|
| 22 |
import torch
|
| 23 |
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
| 24 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline
|
| 25 |
|
| 26 |
torch.backends.cuda.matmul.allow_tf32 = False
|
| 27 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
|
| 166 |
#model5 = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b-coco").to('cuda')
|
| 167 |
#processor5 = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b").to(torch.bfloat16).to('cuda')
|
| 168 |
#processor5 = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b-coco")
|
| 169 |
+
txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, add_prefix_space=False)
|
| 170 |
+
txt_tokenizer.tokenizer_legacy=False
|
| 171 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint).to('cuda')
|
| 172 |
|
| 173 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
| 174 |
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
|
|
|
|
| 178 |
|
| 179 |
neg_prompt_2 = " 'non-photorealistic':1.5, 'unrealistic skin','unattractive face':1.3, 'low quality':1.1, ('dull color scheme', 'dull colors', 'digital noise':1.2),'amateurish', 'poorly drawn face':1.3, 'poorly drawn', 'distorted face', 'low resolution', 'simplistic' "
|
| 180 |
|
| 181 |
+
def filter_text(text,phraseC):
|
| 182 |
+
"""Filters out the text up to and including 'Rewritten Prompt:'."""
|
| 183 |
+
phrase = "Rewritten Prompt:"
|
| 184 |
+
phraseB = "rewritten text:"
|
| 185 |
+
pattern = f"(.*?){re.escape(phrase)}(.*)"
|
| 186 |
+
patternB = f"(.*?){re.escape(phraseB)}(.*)"
|
| 187 |
+
# matchB = re.search(patternB, text)
|
| 188 |
+
matchB = re.search(patternB, text, flags=re.DOTALL)
|
| 189 |
+
if matchB:
|
| 190 |
+
filtered_text = matchB.group(2)
|
| 191 |
+
match = re.search(pattern, filtered_text, flags=re.DOTALL)
|
| 192 |
+
if match:
|
| 193 |
+
filtered_text = match.group(2)
|
| 194 |
+
filtered_text = re.sub(phraseC, "", filtered_text, flags=re.DOTALL) # Replaces the matched pattern with an empty string
|
| 195 |
+
return filtered_text
|
| 196 |
+
else:
|
| 197 |
+
return filtered_text
|
| 198 |
+
else:
|
| 199 |
+
# Handle the case where no match is found
|
| 200 |
+
return text
|
| 201 |
+
|
| 202 |
def upload_to_ftp(filename):
|
| 203 |
try:
|
| 204 |
transport = paramiko.Transport((FTP_HOST, 22))
|
|
|
|
| 301 |
filename= f'rv_IP_{timestamp}.png'
|
| 302 |
print("-- using image file --")
|
| 303 |
print(caption)
|
| 304 |
+
print("-- generating further caption --")
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
system_prompt_rewrite = (
|
| 308 |
+
"You are an AI assistant that rewrites image prompts to be more descriptive and detailed."
|
| 309 |
+
)
|
| 310 |
+
user_prompt_rewrite = (
|
| 311 |
+
"Rewrite this prompt to be more descriptive and detailed and only return the rewritten text: "
|
| 312 |
+
)
|
| 313 |
+
user_prompt_rewrite_2 = (
|
| 314 |
+
"Rephrase this scene to have more elaborate details: "
|
| 315 |
+
)
|
| 316 |
+
input_text = f"{system_prompt_rewrite} {user_prompt_rewrite} {prompt}"
|
| 317 |
+
input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {prompt}"
|
| 318 |
+
print("-- got prompt --")
|
| 319 |
+
# Encode the input text and include the attention mask
|
| 320 |
+
encoded_inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True)
|
| 321 |
+
encoded_inputs_2 = tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True)
|
| 322 |
+
# Ensure all values are on the correct device
|
| 323 |
+
input_ids = encoded_inputs["input_ids"].to(device)
|
| 324 |
+
input_ids_2 = encoded_inputs_2["input_ids"].to(device)
|
| 325 |
+
attention_mask = encoded_inputs["attention_mask"].to(device)
|
| 326 |
+
attention_mask_2 = encoded_inputs_2["attention_mask"].to(device)
|
| 327 |
+
print("-- tokenize prompt --")
|
| 328 |
+
# Google T5
|
| 329 |
+
#input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
| 330 |
+
outputs = model.generate(
|
| 331 |
+
input_ids=input_ids,
|
| 332 |
+
attention_mask=attention_mask,
|
| 333 |
+
max_new_tokens=512,
|
| 334 |
+
temperature=0.2,
|
| 335 |
+
top_p=0.9,
|
| 336 |
+
do_sample=True,
|
| 337 |
+
)
|
| 338 |
+
outputs_2 = model.generate(
|
| 339 |
+
input_ids=input_ids_2,
|
| 340 |
+
attention_mask=attention_mask_2,
|
| 341 |
+
max_new_tokens=65,
|
| 342 |
+
temperature=0.2,
|
| 343 |
+
top_p=0.9,
|
| 344 |
+
do_sample=True,
|
| 345 |
+
)
|
| 346 |
+
# Use the encoded tensor 'text_inputs' here
|
| 347 |
+
enhanced_prompt = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 348 |
+
enhanced_prompt_2 = tokenizer.decode(outputs_2[0], skip_special_tokens=True)
|
| 349 |
+
print('-- generated prompt --')
|
| 350 |
+
enhanced_prompt = filter_text(enhanced_prompt,prompt)
|
| 351 |
+
enhanced_prompt_2 = filter_text(enhanced_prompt_2,prompt)
|
| 352 |
+
print('-- filtered prompt --')
|
| 353 |
+
print(enhanced_prompt)
|
| 354 |
+
print('-- filtered prompt 2 --')
|
| 355 |
+
print(enhanced_prompt_2)
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
|
| 360 |
print('-- generating image --')
|
| 361 |
sd_image = ip_model.generate(
|
| 362 |
pil_image_1=sd_image_a,
|