Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -239,13 +239,13 @@ def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
|
|
239 |
@torch.no_grad()
|
240 |
def captioning(img):
|
241 |
prompts_array = [
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
"The setting of this scene must be located",
|
250 |
# Add more prompts here
|
251 |
]
|
@@ -259,18 +259,18 @@ def captioning(img):
|
|
259 |
# Loop through prompts array:
|
260 |
for prompt in prompts_array:
|
261 |
inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
|
262 |
-
generated_ids = model5.generate(**inputs, min_length=
|
263 |
generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
264 |
response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
|
265 |
output_prompt.append(response_text)
|
266 |
print(f"{response_text}\n") # Print only the response text
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
print(output_prompt)
|
275 |
return output_prompt
|
276 |
|
@@ -300,7 +300,7 @@ def expand_prompt(prompt):
|
|
300 |
outputs = model.generate(
|
301 |
input_ids=input_ids,
|
302 |
attention_mask=attention_mask,
|
303 |
-
max_new_tokens=
|
304 |
temperature=0.2,
|
305 |
top_p=0.9,
|
306 |
do_sample=True,
|
@@ -376,6 +376,9 @@ def generate_30(
|
|
376 |
caption_2=[]
|
377 |
#caption.append(captioner(sd_image_a))
|
378 |
caption.append(captioner2(sd_image_a))
|
|
|
|
|
|
|
379 |
#caption.append(captioner_3(sd_image_a))
|
380 |
caption_2.append(captioning(sd_image_a))
|
381 |
if latent_file_2 is not None: # Check if a latent file is provided
|
@@ -383,7 +386,9 @@ def generate_30(
|
|
383 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
384 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
385 |
#caption.append(captioner(sd_image_b))
|
386 |
-
|
|
|
|
|
387 |
#caption.append(captioner_3(sd_image_b))
|
388 |
caption_2.append(captioning(sd_image_b))
|
389 |
else:
|
@@ -393,7 +398,9 @@ def generate_30(
|
|
393 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
394 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
395 |
#caption.append(captioner(sd_image_c))
|
396 |
-
|
|
|
|
|
397 |
#caption.append(captioner_3(sd_image_c))
|
398 |
caption_2.append(captioning(sd_image_c))
|
399 |
else:
|
@@ -403,7 +410,9 @@ def generate_30(
|
|
403 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
404 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
405 |
#caption.append(captioner(sd_image_d))
|
406 |
-
|
|
|
|
|
407 |
#caption.append(captioner_3(sd_image_d))
|
408 |
caption_2.append(captioning(sd_image_d))
|
409 |
else:
|
@@ -413,7 +422,9 @@ def generate_30(
|
|
413 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
414 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
415 |
#caption.append(captioner(sd_image_e))
|
416 |
-
|
|
|
|
|
417 |
#caption.append(captioner_3(sd_image_e))
|
418 |
caption_2.append(captioning(sd_image_e))
|
419 |
else:
|
@@ -442,11 +453,10 @@ def generate_30(
|
|
442 |
print(new_prompt)
|
443 |
print("-- FINAL PROMPT --")
|
444 |
print("-- ------------ --")
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
#del txt_tokenizer
|
450 |
gc.collect()
|
451 |
torch.cuda.empty_cache()
|
452 |
global text_encoder_1
|
@@ -529,6 +539,9 @@ def generate_60(
|
|
529 |
caption_2=[]
|
530 |
#caption.append(captioner(sd_image_a))
|
531 |
caption.append(captioner2(sd_image_a))
|
|
|
|
|
|
|
532 |
#caption.append(captioner_3(sd_image_a))
|
533 |
caption_2.append(captioning(sd_image_a))
|
534 |
if latent_file_2 is not None: # Check if a latent file is provided
|
@@ -536,7 +549,9 @@ def generate_60(
|
|
536 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
537 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
538 |
#caption.append(captioner(sd_image_b))
|
539 |
-
|
|
|
|
|
540 |
#caption.append(captioner_3(sd_image_b))
|
541 |
caption_2.append(captioning(sd_image_b))
|
542 |
else:
|
@@ -546,7 +561,9 @@ def generate_60(
|
|
546 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
547 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
548 |
#caption.append(captioner(sd_image_c))
|
549 |
-
|
|
|
|
|
550 |
#caption.append(captioner_3(sd_image_c))
|
551 |
caption_2.append(captioning(sd_image_c))
|
552 |
else:
|
@@ -556,7 +573,9 @@ def generate_60(
|
|
556 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
557 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
558 |
#caption.append(captioner(sd_image_d))
|
559 |
-
|
|
|
|
|
560 |
#caption.append(captioner_3(sd_image_d))
|
561 |
caption_2.append(captioning(sd_image_d))
|
562 |
else:
|
@@ -566,7 +585,9 @@ def generate_60(
|
|
566 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
567 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
568 |
#caption.append(captioner(sd_image_e))
|
569 |
-
|
|
|
|
|
570 |
#caption.append(captioner_3(sd_image_e))
|
571 |
caption_2.append(captioning(sd_image_e))
|
572 |
else:
|
@@ -595,11 +616,10 @@ def generate_60(
|
|
595 |
print(new_prompt)
|
596 |
print("-- FINAL PROMPT --")
|
597 |
print("-- ------------ --")
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
#del txt_tokenizer
|
603 |
gc.collect()
|
604 |
torch.cuda.empty_cache()
|
605 |
global text_encoder_1
|
@@ -682,6 +702,9 @@ def generate_90(
|
|
682 |
caption_2=[]
|
683 |
#caption.append(captioner(sd_image_a))
|
684 |
caption.append(captioner2(sd_image_a))
|
|
|
|
|
|
|
685 |
#caption.append(captioner_3(sd_image_a))
|
686 |
caption_2.append(captioning(sd_image_a))
|
687 |
if latent_file_2 is not None: # Check if a latent file is provided
|
@@ -689,7 +712,9 @@ def generate_90(
|
|
689 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
690 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
691 |
#caption.append(captioner(sd_image_b))
|
692 |
-
|
|
|
|
|
693 |
#caption.append(captioner_3(sd_image_b))
|
694 |
caption_2.append(captioning(sd_image_b))
|
695 |
else:
|
@@ -699,7 +724,9 @@ def generate_90(
|
|
699 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
700 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
701 |
#caption.append(captioner(sd_image_c))
|
702 |
-
|
|
|
|
|
703 |
#caption.append(captioner_3(sd_image_c))
|
704 |
caption_2.append(captioning(sd_image_c))
|
705 |
else:
|
@@ -709,7 +736,9 @@ def generate_90(
|
|
709 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
710 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
711 |
#caption.append(captioner(sd_image_d))
|
712 |
-
|
|
|
|
|
713 |
#caption.append(captioner_3(sd_image_d))
|
714 |
caption_2.append(captioning(sd_image_d))
|
715 |
else:
|
@@ -719,7 +748,9 @@ def generate_90(
|
|
719 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
720 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
721 |
#caption.append(captioner(sd_image_e))
|
722 |
-
|
|
|
|
|
723 |
#caption.append(captioner_3(sd_image_e))
|
724 |
caption_2.append(captioning(sd_image_e))
|
725 |
else:
|
@@ -748,11 +779,10 @@ def generate_90(
|
|
748 |
print(new_prompt)
|
749 |
print("-- FINAL PROMPT --")
|
750 |
print("-- ------------ --")
|
751 |
-
|
752 |
-
|
753 |
-
|
754 |
-
|
755 |
-
#del txt_tokenizer
|
756 |
gc.collect()
|
757 |
torch.cuda.empty_cache()
|
758 |
global text_encoder_1
|
|
|
239 |
@torch.no_grad()
|
240 |
def captioning(img):
|
241 |
prompts_array = [
|
242 |
+
# "Adjectives describing this scene are:",
|
243 |
+
# "The color scheme of this image is",
|
244 |
+
# "This scene could be described in detail as",
|
245 |
+
# "The characters in this scene are",
|
246 |
+
# "The larger details in this scene include",
|
247 |
+
# "The smaller details in this scene include",
|
248 |
+
# "The feeling this scene seems like",
|
249 |
"The setting of this scene must be located",
|
250 |
# Add more prompts here
|
251 |
]
|
|
|
259 |
# Loop through prompts array:
|
260 |
for prompt in prompts_array:
|
261 |
inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
|
262 |
+
generated_ids = model5.generate(**inputs, min_length=32, max_length=96) # Adjust max_length if needed
|
263 |
generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
264 |
response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
|
265 |
output_prompt.append(response_text)
|
266 |
print(f"{response_text}\n") # Print only the response text
|
267 |
+
Continue conversation:
|
268 |
+
inputf = processor5(images=img, text=generated_text + ' So therefore, ', return_tensors="pt").to('cuda')
|
269 |
+
generated_ids = model5.generate(**inputf, min_length=32, max_length=96)
|
270 |
+
generated_texta = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
271 |
+
response_text = generated_texta.replace(generated_text, "").strip() # Remove the previous text plus 'So therefore'
|
272 |
+
print(response_text)
|
273 |
+
output_prompt.append(response_text)
|
274 |
print(output_prompt)
|
275 |
return output_prompt
|
276 |
|
|
|
300 |
outputs = model.generate(
|
301 |
input_ids=input_ids,
|
302 |
attention_mask=attention_mask,
|
303 |
+
max_new_tokens=1024,
|
304 |
temperature=0.2,
|
305 |
top_p=0.9,
|
306 |
do_sample=True,
|
|
|
376 |
caption_2=[]
|
377 |
#caption.append(captioner(sd_image_a))
|
378 |
caption.append(captioner2(sd_image_a))
|
379 |
+
cap = captioner2(sd_image_b)
|
380 |
+
caption.append(cap)
|
381 |
+
print(cap)
|
382 |
#caption.append(captioner_3(sd_image_a))
|
383 |
caption_2.append(captioning(sd_image_a))
|
384 |
if latent_file_2 is not None: # Check if a latent file is provided
|
|
|
386 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
387 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
388 |
#caption.append(captioner(sd_image_b))
|
389 |
+
cap = captioner2(sd_image_b)
|
390 |
+
caption.append(cap)
|
391 |
+
print(cap)
|
392 |
#caption.append(captioner_3(sd_image_b))
|
393 |
caption_2.append(captioning(sd_image_b))
|
394 |
else:
|
|
|
398 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
399 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
400 |
#caption.append(captioner(sd_image_c))
|
401 |
+
cap = captioner2(sd_image_c)
|
402 |
+
caption.append(cap)
|
403 |
+
print(cap)
|
404 |
#caption.append(captioner_3(sd_image_c))
|
405 |
caption_2.append(captioning(sd_image_c))
|
406 |
else:
|
|
|
410 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
411 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
412 |
#caption.append(captioner(sd_image_d))
|
413 |
+
cap = captioner2(sd_image_d)
|
414 |
+
caption.append(cap)
|
415 |
+
print(cap)
|
416 |
#caption.append(captioner_3(sd_image_d))
|
417 |
caption_2.append(captioning(sd_image_d))
|
418 |
else:
|
|
|
422 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
423 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
424 |
#caption.append(captioner(sd_image_e))
|
425 |
+
cap = captioner2(sd_image_e)
|
426 |
+
caption.append(cap)
|
427 |
+
print(cap)
|
428 |
#caption.append(captioner_3(sd_image_e))
|
429 |
caption_2.append(captioning(sd_image_e))
|
430 |
else:
|
|
|
453 |
print(new_prompt)
|
454 |
print("-- FINAL PROMPT --")
|
455 |
print("-- ------------ --")
|
456 |
+
global model
|
457 |
+
global txt_tokenizer
|
458 |
+
del model
|
459 |
+
del txt_tokenizer
|
|
|
460 |
gc.collect()
|
461 |
torch.cuda.empty_cache()
|
462 |
global text_encoder_1
|
|
|
539 |
caption_2=[]
|
540 |
#caption.append(captioner(sd_image_a))
|
541 |
caption.append(captioner2(sd_image_a))
|
542 |
+
cap = captioner2(sd_image_b)
|
543 |
+
caption.append(cap)
|
544 |
+
print(cap)
|
545 |
#caption.append(captioner_3(sd_image_a))
|
546 |
caption_2.append(captioning(sd_image_a))
|
547 |
if latent_file_2 is not None: # Check if a latent file is provided
|
|
|
549 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
550 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
551 |
#caption.append(captioner(sd_image_b))
|
552 |
+
cap = captioner2(sd_image_b)
|
553 |
+
caption.append(cap)
|
554 |
+
print(cap)
|
555 |
#caption.append(captioner_3(sd_image_b))
|
556 |
caption_2.append(captioning(sd_image_b))
|
557 |
else:
|
|
|
561 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
562 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
563 |
#caption.append(captioner(sd_image_c))
|
564 |
+
cap = captioner2(sd_image_c)
|
565 |
+
caption.append(cap)
|
566 |
+
print(cap)
|
567 |
#caption.append(captioner_3(sd_image_c))
|
568 |
caption_2.append(captioning(sd_image_c))
|
569 |
else:
|
|
|
573 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
574 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
575 |
#caption.append(captioner(sd_image_d))
|
576 |
+
cap = captioner2(sd_image_d)
|
577 |
+
caption.append(cap)
|
578 |
+
print(cap)
|
579 |
#caption.append(captioner_3(sd_image_d))
|
580 |
caption_2.append(captioning(sd_image_d))
|
581 |
else:
|
|
|
585 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
586 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
587 |
#caption.append(captioner(sd_image_e))
|
588 |
+
cap = captioner2(sd_image_e)
|
589 |
+
caption.append(cap)
|
590 |
+
print(cap)
|
591 |
#caption.append(captioner_3(sd_image_e))
|
592 |
caption_2.append(captioning(sd_image_e))
|
593 |
else:
|
|
|
616 |
print(new_prompt)
|
617 |
print("-- FINAL PROMPT --")
|
618 |
print("-- ------------ --")
|
619 |
+
global model
|
620 |
+
global txt_tokenizer
|
621 |
+
del model
|
622 |
+
del txt_tokenizer
|
|
|
623 |
gc.collect()
|
624 |
torch.cuda.empty_cache()
|
625 |
global text_encoder_1
|
|
|
702 |
caption_2=[]
|
703 |
#caption.append(captioner(sd_image_a))
|
704 |
caption.append(captioner2(sd_image_a))
|
705 |
+
cap = captioner2(sd_image_b)
|
706 |
+
caption.append(cap)
|
707 |
+
print(cap)
|
708 |
#caption.append(captioner_3(sd_image_a))
|
709 |
caption_2.append(captioning(sd_image_a))
|
710 |
if latent_file_2 is not None: # Check if a latent file is provided
|
|
|
712 |
#sd_image_b.resize((height,width), Image.LANCZOS)
|
713 |
sd_image_b.resize((768,768), Image.LANCZOS)
|
714 |
#caption.append(captioner(sd_image_b))
|
715 |
+
cap = captioner2(sd_image_b)
|
716 |
+
caption.append(cap)
|
717 |
+
print(cap)
|
718 |
#caption.append(captioner_3(sd_image_b))
|
719 |
caption_2.append(captioning(sd_image_b))
|
720 |
else:
|
|
|
724 |
#sd_image_c.resize((height,width), Image.LANCZOS)
|
725 |
sd_image_c.resize((768,768), Image.LANCZOS)
|
726 |
#caption.append(captioner(sd_image_c))
|
727 |
+
cap = captioner2(sd_image_c)
|
728 |
+
caption.append(cap)
|
729 |
+
print(cap)
|
730 |
#caption.append(captioner_3(sd_image_c))
|
731 |
caption_2.append(captioning(sd_image_c))
|
732 |
else:
|
|
|
736 |
#sd_image_d.resize((height,width), Image.LANCZOS)
|
737 |
sd_image_d.resize((768,768), Image.LANCZOS)
|
738 |
#caption.append(captioner(sd_image_d))
|
739 |
+
cap = captioner2(sd_image_d)
|
740 |
+
caption.append(cap)
|
741 |
+
print(cap)
|
742 |
#caption.append(captioner_3(sd_image_d))
|
743 |
caption_2.append(captioning(sd_image_d))
|
744 |
else:
|
|
|
748 |
#sd_image_e.resize((height,width), Image.LANCZOS)
|
749 |
sd_image_e.resize((768,768), Image.LANCZOS)
|
750 |
#caption.append(captioner(sd_image_e))
|
751 |
+
cap = captioner2(sd_image_e)
|
752 |
+
caption.append(cap)
|
753 |
+
print(cap)
|
754 |
#caption.append(captioner_3(sd_image_e))
|
755 |
caption_2.append(captioning(sd_image_e))
|
756 |
else:
|
|
|
779 |
print(new_prompt)
|
780 |
print("-- FINAL PROMPT --")
|
781 |
print("-- ------------ --")
|
782 |
+
global model
|
783 |
+
global txt_tokenizer
|
784 |
+
del model
|
785 |
+
del txt_tokenizer
|
|
|
786 |
gc.collect()
|
787 |
torch.cuda.empty_cache()
|
788 |
global text_encoder_1
|