1inkusFace commited on
Commit
c4a9c5d
Β·
verified Β·
1 Parent(s): 1844b19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -43
app.py CHANGED
@@ -239,13 +239,13 @@ def uploadNote(prompt,num_inference_steps,guidance_scale,timestamp):
239
  @torch.no_grad()
240
  def captioning(img):
241
  prompts_array = [
242
- "Adjectives describing this scene are:",
243
- "The color scheme of this image is",
244
- "This scene could be described in detail as",
245
- "The characters in this scene are",
246
- "The larger details in this scene include",
247
- "The smaller details in this scene include",
248
- "The feeling this scene seems like",
249
  "The setting of this scene must be located",
250
  # Add more prompts here
251
  ]
@@ -259,18 +259,18 @@ def captioning(img):
259
  # Loop through prompts array:
260
  for prompt in prompts_array:
261
  inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
262
- generated_ids = model5.generate(**inputs, min_length=42, max_length=64) # Adjust max_length if needed
263
  generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
264
  response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
265
  output_prompt.append(response_text)
266
  print(f"{response_text}\n") # Print only the response text
267
- # Continue conversation:
268
- # inputf = processor5(images=img, text=generated_text + ' So therefore, ', return_tensors="pt").to('cuda')
269
- # generated_ids = model5.generate(**inputf, min_length=24, max_length=42)
270
- # generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
271
- # response_text = generated_text.replace(generated_text, "").strip() # Remove the previous text plus 'So therefore'
272
- # print(response_text)
273
- #output_prompt.append(response_text)
274
  print(output_prompt)
275
  return output_prompt
276
 
@@ -300,7 +300,7 @@ def expand_prompt(prompt):
300
  outputs = model.generate(
301
  input_ids=input_ids,
302
  attention_mask=attention_mask,
303
- max_new_tokens=128,
304
  temperature=0.2,
305
  top_p=0.9,
306
  do_sample=True,
@@ -376,6 +376,9 @@ def generate_30(
376
  caption_2=[]
377
  #caption.append(captioner(sd_image_a))
378
  caption.append(captioner2(sd_image_a))
 
 
 
379
  #caption.append(captioner_3(sd_image_a))
380
  caption_2.append(captioning(sd_image_a))
381
  if latent_file_2 is not None: # Check if a latent file is provided
@@ -383,7 +386,9 @@ def generate_30(
383
  #sd_image_b.resize((height,width), Image.LANCZOS)
384
  sd_image_b.resize((768,768), Image.LANCZOS)
385
  #caption.append(captioner(sd_image_b))
386
- caption.append(captioner2(sd_image_b))
 
 
387
  #caption.append(captioner_3(sd_image_b))
388
  caption_2.append(captioning(sd_image_b))
389
  else:
@@ -393,7 +398,9 @@ def generate_30(
393
  #sd_image_c.resize((height,width), Image.LANCZOS)
394
  sd_image_c.resize((768,768), Image.LANCZOS)
395
  #caption.append(captioner(sd_image_c))
396
- caption.append(captioner2(sd_image_c))
 
 
397
  #caption.append(captioner_3(sd_image_c))
398
  caption_2.append(captioning(sd_image_c))
399
  else:
@@ -403,7 +410,9 @@ def generate_30(
403
  #sd_image_d.resize((height,width), Image.LANCZOS)
404
  sd_image_d.resize((768,768), Image.LANCZOS)
405
  #caption.append(captioner(sd_image_d))
406
- caption.append(captioner2(sd_image_d))
 
 
407
  #caption.append(captioner_3(sd_image_d))
408
  caption_2.append(captioning(sd_image_d))
409
  else:
@@ -413,7 +422,9 @@ def generate_30(
413
  #sd_image_e.resize((height,width), Image.LANCZOS)
414
  sd_image_e.resize((768,768), Image.LANCZOS)
415
  #caption.append(captioner(sd_image_e))
416
- caption.append(captioner2(sd_image_e))
 
 
417
  #caption.append(captioner_3(sd_image_e))
418
  caption_2.append(captioning(sd_image_e))
419
  else:
@@ -442,11 +453,10 @@ def generate_30(
442
  print(new_prompt)
443
  print("-- FINAL PROMPT --")
444
  print("-- ------------ --")
445
-
446
- #global model
447
- #global txt_tokenizer
448
- #del model
449
- #del txt_tokenizer
450
  gc.collect()
451
  torch.cuda.empty_cache()
452
  global text_encoder_1
@@ -529,6 +539,9 @@ def generate_60(
529
  caption_2=[]
530
  #caption.append(captioner(sd_image_a))
531
  caption.append(captioner2(sd_image_a))
 
 
 
532
  #caption.append(captioner_3(sd_image_a))
533
  caption_2.append(captioning(sd_image_a))
534
  if latent_file_2 is not None: # Check if a latent file is provided
@@ -536,7 +549,9 @@ def generate_60(
536
  #sd_image_b.resize((height,width), Image.LANCZOS)
537
  sd_image_b.resize((768,768), Image.LANCZOS)
538
  #caption.append(captioner(sd_image_b))
539
- caption.append(captioner2(sd_image_b))
 
 
540
  #caption.append(captioner_3(sd_image_b))
541
  caption_2.append(captioning(sd_image_b))
542
  else:
@@ -546,7 +561,9 @@ def generate_60(
546
  #sd_image_c.resize((height,width), Image.LANCZOS)
547
  sd_image_c.resize((768,768), Image.LANCZOS)
548
  #caption.append(captioner(sd_image_c))
549
- caption.append(captioner2(sd_image_c))
 
 
550
  #caption.append(captioner_3(sd_image_c))
551
  caption_2.append(captioning(sd_image_c))
552
  else:
@@ -556,7 +573,9 @@ def generate_60(
556
  #sd_image_d.resize((height,width), Image.LANCZOS)
557
  sd_image_d.resize((768,768), Image.LANCZOS)
558
  #caption.append(captioner(sd_image_d))
559
- caption.append(captioner2(sd_image_d))
 
 
560
  #caption.append(captioner_3(sd_image_d))
561
  caption_2.append(captioning(sd_image_d))
562
  else:
@@ -566,7 +585,9 @@ def generate_60(
566
  #sd_image_e.resize((height,width), Image.LANCZOS)
567
  sd_image_e.resize((768,768), Image.LANCZOS)
568
  #caption.append(captioner(sd_image_e))
569
- caption.append(captioner2(sd_image_e))
 
 
570
  #caption.append(captioner_3(sd_image_e))
571
  caption_2.append(captioning(sd_image_e))
572
  else:
@@ -595,11 +616,10 @@ def generate_60(
595
  print(new_prompt)
596
  print("-- FINAL PROMPT --")
597
  print("-- ------------ --")
598
-
599
- #global model
600
- #global txt_tokenizer
601
- #del model
602
- #del txt_tokenizer
603
  gc.collect()
604
  torch.cuda.empty_cache()
605
  global text_encoder_1
@@ -682,6 +702,9 @@ def generate_90(
682
  caption_2=[]
683
  #caption.append(captioner(sd_image_a))
684
  caption.append(captioner2(sd_image_a))
 
 
 
685
  #caption.append(captioner_3(sd_image_a))
686
  caption_2.append(captioning(sd_image_a))
687
  if latent_file_2 is not None: # Check if a latent file is provided
@@ -689,7 +712,9 @@ def generate_90(
689
  #sd_image_b.resize((height,width), Image.LANCZOS)
690
  sd_image_b.resize((768,768), Image.LANCZOS)
691
  #caption.append(captioner(sd_image_b))
692
- caption.append(captioner2(sd_image_b))
 
 
693
  #caption.append(captioner_3(sd_image_b))
694
  caption_2.append(captioning(sd_image_b))
695
  else:
@@ -699,7 +724,9 @@ def generate_90(
699
  #sd_image_c.resize((height,width), Image.LANCZOS)
700
  sd_image_c.resize((768,768), Image.LANCZOS)
701
  #caption.append(captioner(sd_image_c))
702
- caption.append(captioner2(sd_image_c))
 
 
703
  #caption.append(captioner_3(sd_image_c))
704
  caption_2.append(captioning(sd_image_c))
705
  else:
@@ -709,7 +736,9 @@ def generate_90(
709
  #sd_image_d.resize((height,width), Image.LANCZOS)
710
  sd_image_d.resize((768,768), Image.LANCZOS)
711
  #caption.append(captioner(sd_image_d))
712
- caption.append(captioner2(sd_image_d))
 
 
713
  #caption.append(captioner_3(sd_image_d))
714
  caption_2.append(captioning(sd_image_d))
715
  else:
@@ -719,7 +748,9 @@ def generate_90(
719
  #sd_image_e.resize((height,width), Image.LANCZOS)
720
  sd_image_e.resize((768,768), Image.LANCZOS)
721
  #caption.append(captioner(sd_image_e))
722
- caption.append(captioner2(sd_image_e))
 
 
723
  #caption.append(captioner_3(sd_image_e))
724
  caption_2.append(captioning(sd_image_e))
725
  else:
@@ -748,11 +779,10 @@ def generate_90(
748
  print(new_prompt)
749
  print("-- FINAL PROMPT --")
750
  print("-- ------------ --")
751
-
752
- #global model
753
- #global txt_tokenizer
754
- #del model
755
- #del txt_tokenizer
756
  gc.collect()
757
  torch.cuda.empty_cache()
758
  global text_encoder_1
 
239
  @torch.no_grad()
240
  def captioning(img):
241
  prompts_array = [
242
+ # "Adjectives describing this scene are:",
243
+ # "The color scheme of this image is",
244
+ # "This scene could be described in detail as",
245
+ # "The characters in this scene are",
246
+ # "The larger details in this scene include",
247
+ # "The smaller details in this scene include",
248
+ # "The feeling this scene seems like",
249
  "The setting of this scene must be located",
250
  # Add more prompts here
251
  ]
 
259
  # Loop through prompts array:
260
  for prompt in prompts_array:
261
  inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
262
+ generated_ids = model5.generate(**inputs, min_length=32, max_length=96) # Adjust max_length if needed
263
  generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
264
  response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
265
  output_prompt.append(response_text)
266
  print(f"{response_text}\n") # Print only the response text
267
+ Continue conversation:
268
+ inputf = processor5(images=img, text=generated_text + ' So therefore, ', return_tensors="pt").to('cuda')
269
+ generated_ids = model5.generate(**inputf, min_length=32, max_length=96)
270
+ generated_texta = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
271
+ response_text = generated_texta.replace(generated_text, "").strip() # Remove the previous text plus 'So therefore'
272
+ print(response_text)
273
+ output_prompt.append(response_text)
274
  print(output_prompt)
275
  return output_prompt
276
 
 
300
  outputs = model.generate(
301
  input_ids=input_ids,
302
  attention_mask=attention_mask,
303
+ max_new_tokens=1024,
304
  temperature=0.2,
305
  top_p=0.9,
306
  do_sample=True,
 
376
  caption_2=[]
377
  #caption.append(captioner(sd_image_a))
378
  caption.append(captioner2(sd_image_a))
379
+ cap = captioner2(sd_image_b)
380
+ caption.append(cap)
381
+ print(cap)
382
  #caption.append(captioner_3(sd_image_a))
383
  caption_2.append(captioning(sd_image_a))
384
  if latent_file_2 is not None: # Check if a latent file is provided
 
386
  #sd_image_b.resize((height,width), Image.LANCZOS)
387
  sd_image_b.resize((768,768), Image.LANCZOS)
388
  #caption.append(captioner(sd_image_b))
389
+ cap = captioner2(sd_image_b)
390
+ caption.append(cap)
391
+ print(cap)
392
  #caption.append(captioner_3(sd_image_b))
393
  caption_2.append(captioning(sd_image_b))
394
  else:
 
398
  #sd_image_c.resize((height,width), Image.LANCZOS)
399
  sd_image_c.resize((768,768), Image.LANCZOS)
400
  #caption.append(captioner(sd_image_c))
401
+ cap = captioner2(sd_image_c)
402
+ caption.append(cap)
403
+ print(cap)
404
  #caption.append(captioner_3(sd_image_c))
405
  caption_2.append(captioning(sd_image_c))
406
  else:
 
410
  #sd_image_d.resize((height,width), Image.LANCZOS)
411
  sd_image_d.resize((768,768), Image.LANCZOS)
412
  #caption.append(captioner(sd_image_d))
413
+ cap = captioner2(sd_image_d)
414
+ caption.append(cap)
415
+ print(cap)
416
  #caption.append(captioner_3(sd_image_d))
417
  caption_2.append(captioning(sd_image_d))
418
  else:
 
422
  #sd_image_e.resize((height,width), Image.LANCZOS)
423
  sd_image_e.resize((768,768), Image.LANCZOS)
424
  #caption.append(captioner(sd_image_e))
425
+ cap = captioner2(sd_image_e)
426
+ caption.append(cap)
427
+ print(cap)
428
  #caption.append(captioner_3(sd_image_e))
429
  caption_2.append(captioning(sd_image_e))
430
  else:
 
453
  print(new_prompt)
454
  print("-- FINAL PROMPT --")
455
  print("-- ------------ --")
456
+ global model
457
+ global txt_tokenizer
458
+ del model
459
+ del txt_tokenizer
 
460
  gc.collect()
461
  torch.cuda.empty_cache()
462
  global text_encoder_1
 
539
  caption_2=[]
540
  #caption.append(captioner(sd_image_a))
541
  caption.append(captioner2(sd_image_a))
542
+ cap = captioner2(sd_image_b)
543
+ caption.append(cap)
544
+ print(cap)
545
  #caption.append(captioner_3(sd_image_a))
546
  caption_2.append(captioning(sd_image_a))
547
  if latent_file_2 is not None: # Check if a latent file is provided
 
549
  #sd_image_b.resize((height,width), Image.LANCZOS)
550
  sd_image_b.resize((768,768), Image.LANCZOS)
551
  #caption.append(captioner(sd_image_b))
552
+ cap = captioner2(sd_image_b)
553
+ caption.append(cap)
554
+ print(cap)
555
  #caption.append(captioner_3(sd_image_b))
556
  caption_2.append(captioning(sd_image_b))
557
  else:
 
561
  #sd_image_c.resize((height,width), Image.LANCZOS)
562
  sd_image_c.resize((768,768), Image.LANCZOS)
563
  #caption.append(captioner(sd_image_c))
564
+ cap = captioner2(sd_image_c)
565
+ caption.append(cap)
566
+ print(cap)
567
  #caption.append(captioner_3(sd_image_c))
568
  caption_2.append(captioning(sd_image_c))
569
  else:
 
573
  #sd_image_d.resize((height,width), Image.LANCZOS)
574
  sd_image_d.resize((768,768), Image.LANCZOS)
575
  #caption.append(captioner(sd_image_d))
576
+ cap = captioner2(sd_image_d)
577
+ caption.append(cap)
578
+ print(cap)
579
  #caption.append(captioner_3(sd_image_d))
580
  caption_2.append(captioning(sd_image_d))
581
  else:
 
585
  #sd_image_e.resize((height,width), Image.LANCZOS)
586
  sd_image_e.resize((768,768), Image.LANCZOS)
587
  #caption.append(captioner(sd_image_e))
588
+ cap = captioner2(sd_image_e)
589
+ caption.append(cap)
590
+ print(cap)
591
  #caption.append(captioner_3(sd_image_e))
592
  caption_2.append(captioning(sd_image_e))
593
  else:
 
616
  print(new_prompt)
617
  print("-- FINAL PROMPT --")
618
  print("-- ------------ --")
619
+ global model
620
+ global txt_tokenizer
621
+ del model
622
+ del txt_tokenizer
 
623
  gc.collect()
624
  torch.cuda.empty_cache()
625
  global text_encoder_1
 
702
  caption_2=[]
703
  #caption.append(captioner(sd_image_a))
704
  caption.append(captioner2(sd_image_a))
705
+ cap = captioner2(sd_image_b)
706
+ caption.append(cap)
707
+ print(cap)
708
  #caption.append(captioner_3(sd_image_a))
709
  caption_2.append(captioning(sd_image_a))
710
  if latent_file_2 is not None: # Check if a latent file is provided
 
712
  #sd_image_b.resize((height,width), Image.LANCZOS)
713
  sd_image_b.resize((768,768), Image.LANCZOS)
714
  #caption.append(captioner(sd_image_b))
715
+ cap = captioner2(sd_image_b)
716
+ caption.append(cap)
717
+ print(cap)
718
  #caption.append(captioner_3(sd_image_b))
719
  caption_2.append(captioning(sd_image_b))
720
  else:
 
724
  #sd_image_c.resize((height,width), Image.LANCZOS)
725
  sd_image_c.resize((768,768), Image.LANCZOS)
726
  #caption.append(captioner(sd_image_c))
727
+ cap = captioner2(sd_image_c)
728
+ caption.append(cap)
729
+ print(cap)
730
  #caption.append(captioner_3(sd_image_c))
731
  caption_2.append(captioning(sd_image_c))
732
  else:
 
736
  #sd_image_d.resize((height,width), Image.LANCZOS)
737
  sd_image_d.resize((768,768), Image.LANCZOS)
738
  #caption.append(captioner(sd_image_d))
739
+ cap = captioner2(sd_image_d)
740
+ caption.append(cap)
741
+ print(cap)
742
  #caption.append(captioner_3(sd_image_d))
743
  caption_2.append(captioning(sd_image_d))
744
  else:
 
748
  #sd_image_e.resize((height,width), Image.LANCZOS)
749
  sd_image_e.resize((768,768), Image.LANCZOS)
750
  #caption.append(captioner(sd_image_e))
751
+ cap = captioner2(sd_image_e)
752
+ caption.append(cap)
753
+ print(cap)
754
  #caption.append(captioner_3(sd_image_e))
755
  caption_2.append(captioning(sd_image_e))
756
  else:
 
779
  print(new_prompt)
780
  print("-- FINAL PROMPT --")
781
  print("-- ------------ --")
782
+ global model
783
+ global txt_tokenizer
784
+ del model
785
+ del txt_tokenizer
 
786
  gc.collect()
787
  torch.cuda.empty_cache()
788
  global text_encoder_1