codeShare commited on
Commit
b478d5c
·
verified ·
1 Parent(s): e8d8219

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +435 -538
sd_token_similarity_calculator.ipynb CHANGED
@@ -115,27 +115,49 @@
115
  " tensAB[f'{nA + int(key)}'] = tensB[key]\n",
116
  " #-----#\n",
117
  " return dictAB, tensAB , nAB-1\n",
118
- "#-------#\n"
 
 
 
 
119
  ],
120
  "metadata": {
121
- "id": "rUXQ73IbonHY"
 
 
 
 
122
  },
123
- "execution_count": 54,
124
- "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  },
126
  {
127
  "cell_type": "code",
128
  "source": [
129
  "# @title ✳️ Select items for the vocab\n",
130
- "prompt_features = True # @param {\"type\":\"boolean\",\"placeholder\":\"🦜\"}\n",
 
 
131
  "suffix = True # @param {\"type\":\"boolean\",\"placeholder\":\"🔹\"}\n",
132
- "prefix = True # @param {\"type\":\"boolean\",\"placeholder\":\"🔸\"}\n",
133
  "debug = False\n",
134
  "\n",
135
- "#🔸🔹\n",
136
- "%cd /content/\n",
137
- "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n",
138
- "\n",
139
  "#------#\n",
140
  "prompts = {}\n",
141
  "text_encodings = {}\n",
@@ -150,6 +172,14 @@
150
  " print(text_encodings[f'{nA}'])\n",
151
  "#--------#\n",
152
  "\n",
 
 
 
 
 
 
 
 
153
  "if suffix :\n",
154
  " tmp = '/content/text-to-image-prompts/tokens/suffix/'\n",
155
  " for item in ['common','average','rare','weird','exotic'] :\n",
@@ -183,121 +213,47 @@
183
  ],
184
  "metadata": {
185
  "id": "ZMG4CThUAmwW",
186
- "outputId": "c34b6fcc-e4c5-4fa1-a55b-d8175ccb8f67",
187
  "colab": {
188
  "base_uri": "https://localhost:8080/"
189
  }
190
  },
191
- "execution_count": 55,
192
  "outputs": [
193
  {
194
  "output_type": "stream",
195
  "name": "stdout",
196
  "text": [
197
- "/content\n",
198
- "fatal: destination path 'text-to-image-prompts' already exists and is not an empty directory.\n",
199
- "reading 🦜 fusion-t2i-prompt-features-23.json....\n",
200
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
201
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
202
- "reading 🦜 fusion-t2i-prompt-features-9.json....\n",
203
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
204
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
205
- "reading 🦜 fusion-t2i-prompt-features-4.json....\n",
206
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
207
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
208
- "reading 🦜 fusion-t2i-prompt-features-2.json....\n",
209
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
210
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
211
- "reading 🦜 fusion-t2i-prompt-features-25.json....\n",
212
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
213
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
214
- "reading 🦜 fusion-t2i-prompt-features-6.json....\n",
215
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
216
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
217
- "reading 🦜 fusion-t2i-prompt-features-19.json....\n",
218
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
219
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
220
- "reading 🦜 fusion-t2i-prompt-features-33.json....\n",
221
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
222
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
223
- "reading 🦜 fusion-t2i-prompt-features-18.json....\n",
224
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
225
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
226
- "reading 🦜 fusion-t2i-prompt-features-22.json....\n",
227
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
228
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
229
- "reading 🦜 fusion-t2i-prompt-features-31.json....\n",
230
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
231
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
232
- "reading 🦜 fusion-t2i-prompt-features-3.json....\n",
233
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
234
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
235
- "reading 🦜 fusion-t2i-prompt-features-8.json....\n",
236
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
237
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
238
- "reading 🦜 fusion-t2i-prompt-features-7.json....\n",
239
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
240
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
241
- "reading 🦜 fusion-t2i-prompt-features-13.json....\n",
242
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
243
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
244
- "reading 🦜 fusion-t2i-prompt-features-34.json....\n",
245
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
246
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
247
- "reading 🦜 fusion-t2i-prompt-features-14.json....\n",
248
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
249
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
250
- "reading 🦜 fusion-t2i-prompt-features-12.json....\n",
251
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
252
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
253
- "reading 🦜 fusion-t2i-prompt-features-17.json....\n",
254
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
255
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
256
- "reading 🦜 fusion-t2i-prompt-features-21.json....\n",
257
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
258
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
259
- "reading 🦜 fusion-t2i-prompt-features-24.json....\n",
260
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
261
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
262
- "reading 🦜 fusion-t2i-prompt-features-32.json....\n",
263
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
264
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
265
- "reading 🦜 fusion-t2i-prompt-features-5.json....\n",
266
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
267
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
268
- "reading 🦜 fusion-t2i-prompt-features-20.json....\n",
269
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
270
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
271
- "reading 🦜 fusion-t2i-prompt-features-16.json....\n",
272
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
273
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
274
- "reading 🦜 fusion-t2i-prompt-features-28.json....\n",
275
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
276
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
277
- "reading 🦜 fusion-t2i-prompt-features-10.json....\n",
278
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
279
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
280
- "reading 🦜 fusion-t2i-prompt-features-15.json....\n",
281
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
282
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
283
- "reading 🦜 fusion-t2i-prompt-features-26.json....\n",
284
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
285
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
286
- "reading 🦜 fusion-t2i-prompt-features-27.json....\n",
287
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
288
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
289
- "reading 🦜 fusion-t2i-prompt-features-1.json....\n",
290
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
291
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
292
- "reading 🦜 fusion-t2i-prompt-features-29.json....\n",
293
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
294
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
295
- "reading 🦜 fusion-t2i-prompt-features-30.json....\n",
296
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
297
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
298
- "reading 🦜 fusion-t2i-prompt-features-11.json....\n",
299
- "/content/text-to-image-prompts/civitai-prompts/green/text\n",
300
- "/content/text-to-image-prompts/civitai-prompts/green/text_encodings\n",
301
  "reading 🔹 fusion-t2i-sd15-clip-tokens-common-suffix-5 Tokens.json....\n",
302
  "/content/text-to-image-prompts/tokens/suffix/common/text\n",
303
  "/content/text-to-image-prompts/tokens/suffix/common/text_encodings\n",
@@ -399,52 +355,7 @@
399
  "/content/text-to-image-prompts/tokens/suffix/exotic/text_encodings\n",
400
  "reading 🔹 fusion-t2i-sd15-clip-tokens-exotic-suffix-5 Tokens.json....\n",
401
  "/content/text-to-image-prompts/tokens/suffix/exotic/text\n",
402
- "/content/text-to-image-prompts/tokens/suffix/exotic/text_encodings\n",
403
- "reading 🔸 fusion-t2i-sd15-clip-tokens-common-prefix-1 Tokens.json....\n",
404
- "/content/text-to-image-prompts/tokens/prefix/common/text\n",
405
- "/content/text-to-image-prompts/tokens/prefix/common/text_encodings\n",
406
- "reading 🔸 fusion-t2i-sd15-clip-tokens-common-prefix-3 Tokens.json....\n",
407
- "/content/text-to-image-prompts/tokens/prefix/common/text\n",
408
- "/content/text-to-image-prompts/tokens/prefix/common/text_encodings\n",
409
- "reading 🔸 fusion-t2i-sd15-clip-tokens-common-prefix-2 Tokens.json....\n",
410
- "/content/text-to-image-prompts/tokens/prefix/common/text\n",
411
- "/content/text-to-image-prompts/tokens/prefix/common/text_encodings\n",
412
- "reading 🔸 fusion-t2i-sd15-clip-tokens-average-prefix-3 Tokens.json....\n",
413
- "/content/text-to-image-prompts/tokens/prefix/average/text\n",
414
- "/content/text-to-image-prompts/tokens/prefix/average/text_encodings\n",
415
- "reading 🔸 fusion-t2i-sd15-clip-tokens-average-prefix-2 Tokens.json....\n",
416
- "/content/text-to-image-prompts/tokens/prefix/average/text\n",
417
- "/content/text-to-image-prompts/tokens/prefix/average/text_encodings\n",
418
- "reading 🔸 fusion-t2i-sd15-clip-tokens-average-prefix-1 Tokens.json....\n",
419
- "/content/text-to-image-prompts/tokens/prefix/average/text\n",
420
- "/content/text-to-image-prompts/tokens/prefix/average/text_encodings\n",
421
- "reading 🔸 fusion-t2i-sd15-clip-tokens-rare-prefix-1 Tokens.json....\n",
422
- "/content/text-to-image-prompts/tokens/prefix/rare/text\n",
423
- "/content/text-to-image-prompts/tokens/prefix/rare/text_encodings\n",
424
- "reading 🔸 fusion-t2i-sd15-clip-tokens-rare-prefix-3 Tokens.json....\n",
425
- "/content/text-to-image-prompts/tokens/prefix/rare/text\n",
426
- "/content/text-to-image-prompts/tokens/prefix/rare/text_encodings\n",
427
- "reading 🔸 fusion-t2i-sd15-clip-tokens-rare-prefix-2 Tokens.json....\n",
428
- "/content/text-to-image-prompts/tokens/prefix/rare/text\n",
429
- "/content/text-to-image-prompts/tokens/prefix/rare/text_encodings\n",
430
- "reading 🔸 fusion-t2i-sd15-clip-tokens-weird-prefix-1 Tokens.json....\n",
431
- "/content/text-to-image-prompts/tokens/prefix/weird/text\n",
432
- "/content/text-to-image-prompts/tokens/prefix/weird/text_encodings\n",
433
- "reading 🔸 fusion-t2i-sd15-clip-tokens-weird-prefix-3 Tokens.json....\n",
434
- "/content/text-to-image-prompts/tokens/prefix/weird/text\n",
435
- "/content/text-to-image-prompts/tokens/prefix/weird/text_encodings\n",
436
- "reading 🔸 fusion-t2i-sd15-clip-tokens-weird-prefix-2 Tokens.json....\n",
437
- "/content/text-to-image-prompts/tokens/prefix/weird/text\n",
438
- "/content/text-to-image-prompts/tokens/prefix/weird/text_encodings\n",
439
- "reading 🔸 fusion-t2i-sd15-clip-tokens-exotic-prefix-1 Tokens.json....\n",
440
- "/content/text-to-image-prompts/tokens/prefix/exotic/text\n",
441
- "/content/text-to-image-prompts/tokens/prefix/exotic/text_encodings\n",
442
- "reading 🔸 fusion-t2i-sd15-clip-tokens-exotic-prefix-2 Tokens.json....\n",
443
- "/content/text-to-image-prompts/tokens/prefix/exotic/text\n",
444
- "/content/text-to-image-prompts/tokens/prefix/exotic/text_encodings\n",
445
- "reading 🔸 fusion-t2i-sd15-clip-tokens-exotic-prefix-3 Tokens.json....\n",
446
- "/content/text-to-image-prompts/tokens/prefix/exotic/text\n",
447
- "/content/text-to-image-prompts/tokens/prefix/exotic/text_encodings\n"
448
  ]
449
  }
450
  ]
@@ -480,7 +391,7 @@
480
  "metadata": {
481
  "id": "xc-PbIYF428y"
482
  },
483
- "execution_count": 56,
484
  "outputs": []
485
  },
486
  {
@@ -529,117 +440,117 @@
529
  ],
530
  "metadata": {
531
  "id": "_vnVbxcFf7WV",
532
- "outputId": "b9c6a86b-1239-410b-e240-aca1c8301249",
533
  "colab": {
534
  "base_uri": "https://localhost:8080/"
535
  }
536
  },
537
- "execution_count": 50,
538
  "outputs": [
539
  {
540
  "output_type": "stream",
541
  "name": "stdout",
542
  "text": [
543
- "{her car behind on the road</w>|\n",
544
- "the race car driver sits behind the wheel</w>|\n",
545
- "speed-|\n",
546
- "driving-|\n",
547
- "highway-|\n",
548
- "road-|\n",
549
- "fast-|\n",
550
- "drive-|\n",
551
- "racing car in the background</w>|\n",
552
- "traffic-|\n",
553
- "driver-|\n",
554
- "thats-|\n",
555
- "roadto-|\n",
556
- "slow-|\n",
557
- "cause-|\n",
558
- "cruise-|\n",
559
- "cyber car looking_upscenery</w>|\n",
560
- "cyber car looking_upscenery</w>|\n",
561
- "travel-|\n",
562
- "truck-|\n",
563
- "car-|\n",
564
- "that-|\n",
565
- "destination-|\n",
566
- "way-|\n",
567
- "a beautiful</w>|\n",
568
- "a beautiful</w>|\n",
569
- "just-|\n",
570
- "grat-|\n",
571
- "rolling-|\n",
572
- "simply-|\n",
573
- "cruis-|\n",
574
  "speed</w>|\n",
575
- "something-|\n",
576
- "cool-|\n",
577
  "roadtrip</w>|\n",
578
- "onlyin-|\n",
579
- "random-|\n",
580
- "because-|\n",
581
- "carr-|\n",
582
- "shre-|\n",
583
  "driving</w>|\n",
584
- "funny-|\n",
585
- "street-|\n",
586
- "dding-|\n",
587
- "aand-|\n",
588
  "ontheroad</w>|\n",
589
- "classic-|\n",
590
- "beauti-|\n",
591
- "quick-|\n",
592
- "reall-|\n",
593
- "wast-|\n",
594
- "wait-|\n",
595
- "on the crowded streets</w>|\n",
596
- "haun-|\n",
597
- "epic-|\n",
598
- "lest-|\n",
599
  "faster</w>|\n",
600
- "stoo-|\n",
601
- "quality-|\n",
602
- "fantastic-|\n",
603
- "seem-|\n",
604
- "love-|\n",
605
- "yeah-|\n",
606
- "well-|\n",
607
- "moving-|\n",
608
- "nice-|\n",
609
- "late-|\n",
610
- "camer-|\n",
611
- "sure-|\n",
612
- "only-|\n",
613
- "chasing-|\n",
614
- "loving-|\n",
615
- "the image should have</w>|\n",
616
- "best-|\n",
617
- "reali-|\n",
618
- "takes off down the track</w>|\n",
619
- "and you can't help but feel</w>|\n",
620
- "dazz-|\n",
621
- "followthe-|\n",
622
- "awesome-|\n",
623
- "words-|\n",
624
- "lifein-|\n",
625
- "eng-|\n",
626
- "herit-|\n",
627
- "oneof-|\n",
628
- "but now seems to exude</w>|\n",
629
- "then-|\n",
630
- "beast-|\n",
631
- "thisis-|\n",
632
- "goodday-|\n",
633
- "thisi-|\n",
634
- "right-|\n",
635
- "is appropriate for the time period</w>|\n",
636
- "amazing-|\n",
637
- "race-|\n",
638
- "madd-|\n",
639
- "coming-|\n",
640
- "zoom-|\n",
641
- "picture-|\n",
642
- "ight-}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  ]
644
  }
645
  ]
@@ -700,13 +611,13 @@
700
  ],
701
  "metadata": {
702
  "id": "ke6mZ1RZDOeB",
703
- "outputId": "53b9280f-b8ba-4036-c720-e679bd0a162c",
704
  "colab": {
705
  "base_uri": "https://localhost:8080/",
706
  "height": 1000
707
  }
708
  },
709
- "execution_count": 51,
710
  "outputs": [
711
  {
712
  "output_type": "display_data",
@@ -750,7 +661,7 @@
750
  "metadata": {
751
  "id": "rebogpoyOG8k"
752
  },
753
- "execution_count": 52,
754
  "outputs": []
755
  },
756
  {
@@ -758,7 +669,7 @@
758
  "source": [
759
  "# @title 🖼️ Print the results\n",
760
  "list_size = 100 # @param {type:'number'}\n",
761
- "start_at_index = 0 # @param {type:'number'}\n",
762
  "print_Similarity = True # @param {type:\"boolean\"}\n",
763
  "print_Prompts = True # @param {type:\"boolean\"}\n",
764
  "print_Prefix = True # @param {type:\"boolean\"}\n",
@@ -802,271 +713,118 @@
802
  "colab": {
803
  "base_uri": "https://localhost:8080/"
804
  },
805
- "outputId": "68e866ad-9cc7-43f6-ad67-d72c28fa0b46"
806
  },
807
- "execution_count": 53,
808
  "outputs": [
809
  {
810
  "output_type": "stream",
811
  "name": "stdout",
812
  "text": [
813
- "{beautiful avatar pictures</w>|\n",
814
- "purple hair crowned standing in storm background</w>|\n",
815
- "warcraft</w>|\n",
816
- "beautiful celebrity futuristic sci-fi</w>|\n",
817
- "by magali villeneuve</w>|\n",
818
- "visually striking spectacle inspired by the works</w>|\n",
819
- "visually striking spectacle inspired by the works</w>|\n",
820
- "a beautiful female warrior</w>|\n",
821
- "a sexy scifi warrior</w>|\n",
822
- "a sexy scifi warrior</w>|\n",
823
- "film still from halo live action adaptation</w>|\n",
824
- "cinematic film still from captain marvel</w>|\n",
825
- "beautiful female warrior</w>|\n",
826
- "beautiful female warrior</w>|\n",
827
- "film still from halo live-action movie adaptation</w>|\n",
828
- "outlandish costume design</w>|\n",
829
- "defend-|\n",
830
- "beautiful indian warrior queen</w>|\n",
831
- "genie</w>|\n",
832
- "of female space soldier</w>|\n",
833
- "vfx</w>|\n",
834
- "blue light on her face she appears calm</w>|\n",
835
- "a female scifi warrior</w>|\n",
836
- "a female scifi warrior</w>|\n",
837
- "orian</w>|\n",
838
- "nebula in her streak hair</w>|\n",
839
- "wall-|\n",
840
- "of brown skinned indian warrior queen</w>|\n",
841
- "played by young dove cameron</w>|\n",
842
- "thora</w>|\n",
843
- "has runes on her body</w>|\n",
844
- "has runes on her body</w>|\n",
845
- "beautiful light makeup female sorceress</w>|\n",
846
- "a gorgeous female void thrall</w>|\n",
847
- "a gorgeous female void thrall</w>|\n",
848
- "beautiful female elf queen</w>|\n",
849
- "captivating mystique</w>|\n",
850
- "captivating mystique</w>|\n",
851
- "symbolizing her role as the goddess</w>|\n",
852
- "blu-|\n",
853
- "character integrated into the background</w>|\n",
854
- "swirling black light around the character</w>|\n",
855
- "very beautiful jean grey wearing</w>|\n",
856
- "lightly blued metal armor</w>|\n",
857
- "multiple different characters in the background</w>|\n",
858
- "raid</w>|\n",
859
- "tributes</w>|\n",
860
- "cinematic still from conan</w>|\n",
861
- "yo person as dark elf queen</w>|\n",
862
- "descendants</w>|\n",
863
- "trending at cgsociety</w>|\n",
864
- "exodus</w>|\n",
865
- "scarlett</w>|\n",
866
- "femaleastronaut exalted human futuristic warrior</w>|\n",
867
- "femaleastronaut exalted human futuristic warrior</w>|\n",
868
- "indigo</w>|\n",
869
- "epic fantasy greek priestess</w>|\n",
870
- "epic fantasy greek priestess</w>|\n",
871
- "female draenei world</w>|\n",
872
- "genetically engineered soldiers</w>|\n",
873
- "genetically engineered soldiers</w>|\n",
874
- "visually striking scene the lighting</w>|\n",
875
- "the female soldier marches in formation</w>|\n",
876
- "the female soldier marches in formation</w>|\n",
877
- "revealing costume design</w>|\n",
878
- "pandora_smith_magister</w>|\n",
879
- "pandora_smith_magister</w>|\n",
880
- "his sorceress in the back ground</w>|\n",
881
- "tali</w>|\n",
882
- "gorgeous muscular elven ukrainian</w>|\n",
883
- "gorgeous muscular elven ukrainian</w>|\n",
884
- "womenshistorymonth</w>|\n",
885
- "water elemental officer jenny</w>|\n",
886
- "ontari-|\n",
887
- "bleedblue</w>|\n",
888
- "norse female goddess</w>|\n",
889
- "matte fantasy painting</w>|\n",
890
- "thanos</w>|\n",
891
- "periwinkle purple skin</w>|\n",
892
- "xmen</w>|\n",
893
- "of norse female goddess</w>|\n",
894
- "mujer de ojos rojos y pelo azulado</w>|\n",
895
- "strength the battle scene around her</w>|\n",
896
- "a beautiful young redhead warrior</w>|\n",
897
- "a beautiful young redhead warrior</w>|\n",
898
- "moody cinematic epic concept art</w>|\n",
899
- "hypnotically beautiful wood elf in</w>|\n",
900
- "hypnotically beautiful wood elf in</w>|\n",
901
- "loraemmawatsonlora_v</w>|\n",
902
- "alphonse mucha cinematic epic + rule</w>|\n",
903
- "alphonse mucha cinematic epic + rule</w>|\n",
904
- "an actress standing behind</w>|\n",
905
- "fking_scifi_v amazing</w>|\n",
906
- "shine like sapphires</w>|\n",
907
- "defence</w>|\n",
908
- "female elemental water wizard</w>|\n",
909
- "female elemental water wizard</w>|\n",
910
- "beautiful character design</w>|\n",
911
- "horde</w>|\n",
912
- "female warriors protecting an underwater temple</w>}\n"
913
  ]
914
  }
915
  ]
916
  },
917
- {
918
- "cell_type": "code",
919
- "source": [
920
- "# @title ⚡ Get similiar tokens (not updated yet)\n",
921
- "import torch\n",
922
- "from transformers import AutoTokenizer\n",
923
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
924
- "\n",
925
- "# @markdown Write name of token to match against\n",
926
- "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
927
- "\n",
928
- "prompt = token_name\n",
929
- "# @markdown (optional) Mix the token with something else\n",
930
- "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
931
- "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
932
- "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
933
- "# @markdown Limit char size of included token\n",
934
- "\n",
935
- "min_char_size = 0 # param {type:\"slider\", min:0, max: 50, step:1}\n",
936
- "char_range = 50 # param {type:\"slider\", min:0, max: 50, step:1}\n",
937
- "\n",
938
- "tokenizer_output = tokenizer(text = prompt)\n",
939
- "input_ids = tokenizer_output['input_ids']\n",
940
- "id_A = input_ids[1]\n",
941
- "A = torch.tensor(token[id_A])\n",
942
- "A = A/A.norm(p=2, dim=-1, keepdim=True)\n",
943
- "#-----#\n",
944
- "tokenizer_output = tokenizer(text = mix_with)\n",
945
- "input_ids = tokenizer_output['input_ids']\n",
946
- "id_C = input_ids[1]\n",
947
- "C = torch.tensor(token[id_C])\n",
948
- "C = C/C.norm(p=2, dim=-1, keepdim=True)\n",
949
- "#-----#\n",
950
- "sim_AC = torch.dot(A,C)\n",
951
- "#-----#\n",
952
- "print(input_ids)\n",
953
- "#-----#\n",
954
- "\n",
955
- "#if no imput exists we just randomize the entire thing\n",
956
- "if (prompt == \"\"):\n",
957
- " id_A = -1\n",
958
- " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n",
959
- " R = torch.rand(A.shape)\n",
960
- " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
961
- " A = R\n",
962
- " name_A = 'random_A'\n",
963
- "\n",
964
- "#if no imput exists we just randomize the entire thing\n",
965
- "if (mix_with == \"\"):\n",
966
- " id_C = -1\n",
967
- " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n",
968
- " R = torch.rand(A.shape)\n",
969
- " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
970
- " C = R\n",
971
- " name_C = 'random_C'\n",
972
- "\n",
973
- "name_A = \"A of random type\"\n",
974
- "if (id_A>-1):\n",
975
- " name_A = vocab(id_A)\n",
976
- "\n",
977
- "name_C = \"token C of random type\"\n",
978
- "if (id_C>-1):\n",
979
- " name_C = vocab(id_C)\n",
980
- "\n",
981
- "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {round(sim_AC.item()*100,2)} %\")\n",
982
- "\n",
983
- "if (mix_method == \"None\"):\n",
984
- " print(\"No operation\")\n",
985
- "\n",
986
- "if (mix_method == \"Average\"):\n",
987
- " A = w*A + (1-w)*C\n",
988
- " _A = LA.vector_norm(A, ord=2)\n",
989
- " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n",
990
- "\n",
991
- "if (mix_method == \"Subtract\"):\n",
992
- " tmp = w*A - (1-w)*C\n",
993
- " tmp = tmp/tmp.norm(p=2, dim=-1, keepdim=True)\n",
994
- " A = tmp\n",
995
- " #//---//\n",
996
- " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n",
997
- "\n",
998
- "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n",
999
- "\n",
1000
- "dots = torch.zeros(NUM_TOKENS)\n",
1001
- "for index in range(NUM_TOKENS):\n",
1002
- " id_B = index\n",
1003
- " B = torch.tensor(token[id_B])\n",
1004
- " B = B/B.norm(p=2, dim=-1, keepdim=True)\n",
1005
- " sim_AB = torch.dot(A,B)\n",
1006
- " dots[index] = sim_AB\n",
1007
- "\n",
1008
- "\n",
1009
- "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
1010
- "#----#\n",
1011
- "if (mix_method == \"Average\"):\n",
1012
- " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1013
- "if (mix_method == \"Subtract\"):\n",
1014
- " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1015
- "if (mix_method == \"None\"):\n",
1016
- " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
1017
- "\n",
1018
- "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n",
1019
- "\n",
1020
- "# @markdown Set print options\n",
1021
- "list_size = 100 # @param {type:'number'}\n",
1022
- "print_ID = False # @param {type:\"boolean\"}\n",
1023
- "print_Similarity = True # @param {type:\"boolean\"}\n",
1024
- "print_Name = True # @param {type:\"boolean\"}\n",
1025
- "print_Divider = True # @param {type:\"boolean\"}\n",
1026
- "\n",
1027
- "\n",
1028
- "if (print_Divider):\n",
1029
- " print('//---//')\n",
1030
- "\n",
1031
- "print('')\n",
1032
- "print('Here is the result : ')\n",
1033
- "print('')\n",
1034
- "\n",
1035
- "for index in range(list_size):\n",
1036
- " id = indices[index].item()\n",
1037
- " if (print_Name):\n",
1038
- " print(f'{vocab(id)}') # vocab item\n",
1039
- " if (print_ID):\n",
1040
- " print(f'ID = {id}') # IDs\n",
1041
- " if (print_Similarity):\n",
1042
- " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
1043
- " if (print_Divider):\n",
1044
- " print('--------')\n",
1045
- "\n",
1046
- "#Print the sorted list from above result\n",
1047
- "\n",
1048
- "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
1049
- "\n",
1050
- "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n",
1051
- "\n",
1052
- "# Save results as .db file\n",
1053
- "import shelve\n",
1054
- "VOCAB_FILENAME = 'tokens_most_similiar_to_' + name_A.replace('</w>','').strip()\n",
1055
- "d = shelve.open(VOCAB_FILENAME)\n",
1056
- "#NUM TOKENS == 49407\n",
1057
- "for index in range(NUM_TOKENS):\n",
1058
- " #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
1059
- " d[f'{index}']= vocab(indices[index].item()) #<---- write values to .db file\n",
1060
- "#----#\n",
1061
- "d.close() #close the file\n",
1062
- "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
1063
- ],
1064
- "metadata": {
1065
- "id": "iWeFnT1gAx6A"
1066
- },
1067
- "execution_count": null,
1068
- "outputs": []
1069
- },
1070
  {
1071
  "cell_type": "code",
1072
  "source": [
@@ -1383,20 +1141,6 @@
1383
  "execution_count": null,
1384
  "outputs": []
1385
  },
1386
- {
1387
- "cell_type": "code",
1388
- "source": [
1389
- "# @title (Optional) ⚡Actively set which Vocab list to use for the interrogator\n",
1390
- "token_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a token_name used earlier\"}\n",
1391
- "VOCAB_FILENAME = 'tokens_most_similiar_to_' + token_name.replace('</w>','').strip()\n",
1392
- "print(f'Using a vocab ordered to most similiar to the token {token_name}')"
1393
- ],
1394
- "metadata": {
1395
- "id": "FYa96UCQuE1U"
1396
- },
1397
- "execution_count": null,
1398
- "outputs": []
1399
- },
1400
  {
1401
  "cell_type": "code",
1402
  "source": [
@@ -1436,6 +1180,159 @@
1436
  "execution_count": null,
1437
  "outputs": []
1438
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1439
  {
1440
  "cell_type": "markdown",
1441
  "source": [
@@ -1485,16 +1382,16 @@
1485
  "my_mkdirs('/content/text_encodings/')\n",
1486
  "filename = ''\n",
1487
  "\n",
1488
- "NUM_FILES = 7\n",
1489
  "\n",
1490
  "for file_index in range(NUM_FILES + 1):\n",
1491
  " if file_index <1: continue\n",
1492
  " #if file_index >4: break\n",
1493
- " filename = f'🔹 fusion-t2i-sd15-clip-tokens-exotic-suffix-{file_index} Tokens'\n",
1494
  " #🦜 fusion-t2i-prompt-features-1.json\n",
1495
  "\n",
1496
  " # Read suffix.json\n",
1497
- " %cd /content/text-to-image-prompts/tokens/suffix/exotic/text\n",
1498
  " with open(filename + '.json', 'r') as f:\n",
1499
  " data = json.load(f)\n",
1500
  " _df = pd.DataFrame({'count': data})['count']\n",
@@ -1530,9 +1427,9 @@
1530
  {
1531
  "cell_type": "code",
1532
  "source": [
1533
- "# @title Download the created text_encodings as .zip file\n",
1534
  "%cd /content/\n",
1535
- "!zip -r /content/tokens.zip /content/text-to-image-prompts/tokens"
1536
  ],
1537
  "metadata": {
1538
  "id": "gX-sHZPWj4Lt"
 
115
  " tensAB[f'{nA + int(key)}'] = tensB[key]\n",
116
  " #-----#\n",
117
  " return dictAB, tensAB , nAB-1\n",
118
+ "#-------#\n",
119
+ "\n",
120
+ "#🔸🔹\n",
121
+ "%cd /content/\n",
122
+ "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n"
123
  ],
124
  "metadata": {
125
+ "id": "rUXQ73IbonHY",
126
+ "outputId": "9e40d8a1-fbb3-4200-fc80-3d6f32d3667a",
127
+ "colab": {
128
+ "base_uri": "https://localhost:8080/"
129
+ }
130
  },
131
+ "execution_count": 1,
132
+ "outputs": [
133
+ {
134
+ "output_type": "stream",
135
+ "name": "stdout",
136
+ "text": [
137
+ "/content\n",
138
+ "Cloning into 'text-to-image-prompts'...\n",
139
+ "remote: Enumerating objects: 450, done.\u001b[K\n",
140
+ "remote: Counting objects: 100% (447/447), done.\u001b[K\n",
141
+ "remote: Compressing objects: 100% (428/428), done.\u001b[K\n",
142
+ "remote: Total 450 (delta 81), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
143
+ "Receiving objects: 100% (450/450), 998.98 KiB | 3.92 MiB/s, done.\n",
144
+ "Resolving deltas: 100% (81/81), done.\n",
145
+ "Filtering content: 100% (95/95), 305.98 MiB | 41.88 MiB/s, done.\n"
146
+ ]
147
+ }
148
+ ]
149
  },
150
  {
151
  "cell_type": "code",
152
  "source": [
153
  "# @title ✳️ Select items for the vocab\n",
154
+ "\n",
155
+ "prompt_features = False # @param {\"type\":\"boolean\",\"placeholder\":\"🦜\"}\n",
156
+ "civitai_blue_set = True # @param {\"type\":\"boolean\",\"placeholder\":\"📘\"}\n",
157
  "suffix = True # @param {\"type\":\"boolean\",\"placeholder\":\"🔹\"}\n",
158
+ "prefix = False # @param {\"type\":\"boolean\",\"placeholder\":\"🔸\"}\n",
159
  "debug = False\n",
160
  "\n",
 
 
 
 
161
  "#------#\n",
162
  "prompts = {}\n",
163
  "text_encodings = {}\n",
 
172
  " print(text_encodings[f'{nA}'])\n",
173
  "#--------#\n",
174
  "\n",
175
+ "if civitai_blue_set:\n",
176
+ " url = '/content/text-to-image-prompts/civitai-prompts/blue'\n",
177
+ " prompts , text_encodings, nA = append_from_url(prompts , text_encodings, nA , url , '')\n",
178
+ " if debug:\n",
179
+ " print(prompts[f'{nA}'])\n",
180
+ " print(text_encodings[f'{nA}'])\n",
181
+ "#--------#\n",
182
+ "\n",
183
  "if suffix :\n",
184
  " tmp = '/content/text-to-image-prompts/tokens/suffix/'\n",
185
  " for item in ['common','average','rare','weird','exotic'] :\n",
 
213
  ],
214
  "metadata": {
215
  "id": "ZMG4CThUAmwW",
216
+ "outputId": "dfb5a625-72e7-462e-c118-682f0a45ed12",
217
  "colab": {
218
  "base_uri": "https://localhost:8080/"
219
  }
220
  },
221
+ "execution_count": 17,
222
  "outputs": [
223
  {
224
  "output_type": "stream",
225
  "name": "stdout",
226
  "text": [
227
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-2.json....\n",
228
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
229
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
230
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-10.json....\n",
231
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
232
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
233
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-6.json....\n",
234
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
235
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
236
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-5.json....\n",
237
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
238
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
239
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-4.json....\n",
240
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
241
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
242
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-8.json....\n",
243
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
244
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
245
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-1.json....\n",
246
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
247
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
248
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-7.json....\n",
249
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
250
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
251
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-3.json....\n",
252
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
253
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
254
+ "reading 🧿📘 fusion-t2i-civitai-0-20-chars-mix-9.json....\n",
255
+ "/content/text-to-image-prompts/civitai-prompts/blue/text\n",
256
+ "/content/text-to-image-prompts/civitai-prompts/blue/text_encodings\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  "reading 🔹 fusion-t2i-sd15-clip-tokens-common-suffix-5 Tokens.json....\n",
258
  "/content/text-to-image-prompts/tokens/suffix/common/text\n",
259
  "/content/text-to-image-prompts/tokens/suffix/common/text_encodings\n",
 
355
  "/content/text-to-image-prompts/tokens/suffix/exotic/text_encodings\n",
356
  "reading 🔹 fusion-t2i-sd15-clip-tokens-exotic-suffix-5 Tokens.json....\n",
357
  "/content/text-to-image-prompts/tokens/suffix/exotic/text\n",
358
+ "/content/text-to-image-prompts/tokens/suffix/exotic/text_encodings\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  ]
360
  }
361
  ]
 
391
  "metadata": {
392
  "id": "xc-PbIYF428y"
393
  },
394
+ "execution_count": 18,
395
  "outputs": []
396
  },
397
  {
 
440
  ],
441
  "metadata": {
442
  "id": "_vnVbxcFf7WV",
443
+ "outputId": "47f6617b-752b-4349-a2bd-46fdae985572",
444
  "colab": {
445
  "base_uri": "https://localhost:8080/"
446
  }
447
  },
448
+ "execution_count": 19,
449
  "outputs": [
450
  {
451
  "output_type": "stream",
452
  "name": "stdout",
453
  "text": [
454
+ "{Sports Car|\n",
455
+ "beautiful car|\n",
456
+ "road nature|\n",
457
+ "running road|\n",
458
+ "sport car petite|\n",
459
+ "it's a gas gas|\n",
460
+ "The Road Not Taken|\n",
461
+ "road Horizon|\n",
462
+ "far away|\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  "speed</w>|\n",
 
 
464
  "roadtrip</w>|\n",
 
 
 
 
 
465
  "driving</w>|\n",
466
+ "true to life|\n",
467
+ "street|\n",
 
 
468
  "ontheroad</w>|\n",
469
+ "sharp image|\n",
470
+ "on a race track|\n",
471
+ "road construction|\n",
472
+ "with a soft|\n",
473
+ "a picture|\n",
 
 
 
 
 
474
  "faster</w>|\n",
475
+ "Fantastic|\n",
476
+ "loving|\n",
477
+ "road architecture|\n",
478
+ "day scenery|\n",
479
+ "wonderful|\n",
480
+ "head back|\n",
481
+ "photographic style|\n",
482
+ "alright</w>|\n",
483
+ "thats</w>|\n",
484
+ "awesome inspiring|\n",
485
+ "Know the past|\n",
486
+ "seems</w>|\n",
487
+ "as style|\n",
488
+ "inspired|\n",
489
+ "lovely|\n",
490
+ "well</w>|\n",
491
+ "reminds</w>|\n",
492
+ "beautiful amazing|\n",
493
+ "highway</w>|\n",
494
+ "appears timeless|\n",
495
+ "that</w>|\n",
496
+ "beautiful gorgeous|\n",
497
+ "highway setting|\n",
498
+ "Science fiction|\n",
499
+ "science fiction|\n",
500
+ "speeding</w>|\n",
501
+ "in a mountain land|\n",
502
+ "inspiration|\n",
503
+ "day time|\n",
504
+ "busy highway|\n",
505
+ "really</w>|\n",
506
+ "Phenomenal|\n",
507
+ "girl trembling|\n",
508
+ "beauty|\n",
509
+ "baby|\n",
510
+ "top quality|\n",
511
+ "motorcycle freeway|\n",
512
+ "very beautiful|\n",
513
+ "cute beautiful|\n",
514
+ "beautiful|\n",
515
+ "beautiful|\n",
516
+ "Beautiful|\n",
517
+ "tweeted</w>|\n",
518
+ "street in city|\n",
519
+ "exciting|\n",
520
+ "fire flames|\n",
521
+ "Memory|\n",
522
+ "Riding|\n",
523
+ "in first place|\n",
524
+ "a spaceship|\n",
525
+ "automobile</w>|\n",
526
+ "emotional|\n",
527
+ "retweeted</w>|\n",
528
+ "handsome|\n",
529
+ "car</w>|\n",
530
+ "artistic cool|\n",
531
+ "it is Furious|\n",
532
+ "stanning|\n",
533
+ "trembling|\n",
534
+ "cool amazing|\n",
535
+ "smooth|\n",
536
+ "this</w>|\n",
537
+ "countryside|\n",
538
+ "dynamic movement|\n",
539
+ "beautiful elegant|\n",
540
+ "stunning|\n",
541
+ "ethereal fantastic|\n",
542
+ "gorgeous inspired|\n",
543
+ "beautiful hot|\n",
544
+ "street elegant|\n",
545
+ "heres</w>|\n",
546
+ "A stylish|\n",
547
+ "at_day|\n",
548
+ "evocative image|\n",
549
+ "hysterical|\n",
550
+ "dreamlike|\n",
551
+ ". cute adorable|\n",
552
+ "Exquisite|\n",
553
+ "gorgeous}\n"
554
  ]
555
  }
556
  ]
 
611
  ],
612
  "metadata": {
613
  "id": "ke6mZ1RZDOeB",
614
+ "outputId": "d8ef4589-8393-4001-ff35-c0c30646a576",
615
  "colab": {
616
  "base_uri": "https://localhost:8080/",
617
  "height": 1000
618
  }
619
  },
620
+ "execution_count": 14,
621
  "outputs": [
622
  {
623
  "output_type": "display_data",
 
661
  "metadata": {
662
  "id": "rebogpoyOG8k"
663
  },
664
+ "execution_count": 15,
665
  "outputs": []
666
  },
667
  {
 
669
  "source": [
670
  "# @title 🖼️ Print the results\n",
671
  "list_size = 100 # @param {type:'number'}\n",
672
+ "start_at_index = 100 # @param {type:'number'}\n",
673
  "print_Similarity = True # @param {type:\"boolean\"}\n",
674
  "print_Prompts = True # @param {type:\"boolean\"}\n",
675
  "print_Prefix = True # @param {type:\"boolean\"}\n",
 
713
  "colab": {
714
  "base_uri": "https://localhost:8080/"
715
  },
716
+ "outputId": "2271de2f-6885-4f72-bcd0-3c39a9cfaada"
717
  },
718
+ "execution_count": 16,
719
  "outputs": [
720
  {
721
  "output_type": "stream",
722
  "name": "stdout",
723
  "text": [
724
+ "{reimagined</w>|\n",
725
+ "movie</w>|\n",
726
+ "4k vivid colors|\n",
727
+ "movie still|\n",
728
+ "Movie still|\n",
729
+ "heroine</w>|\n",
730
+ "amazon</w>|\n",
731
+ "taun-|\n",
732
+ "alliance</w>|\n",
733
+ "reminis-|\n",
734
+ "premiere</w>|\n",
735
+ "honor-|\n",
736
+ "artemis</w>|\n",
737
+ "blue archive|\n",
738
+ "guarding</w>|\n",
739
+ "purple-|\n",
740
+ "protectors</w>|\n",
741
+ "Concept art|\n",
742
+ "concept art|\n",
743
+ "mags</w>|\n",
744
+ "cinematic still|\n",
745
+ "Cinematic still|\n",
746
+ "epic fantasy|\n",
747
+ "athena</w>|\n",
748
+ "ragnarok</w>|\n",
749
+ "bloo-|\n",
750
+ "special effects|\n",
751
+ "rained</w>|\n",
752
+ "vibrant arthouse|\n",
753
+ "clones</w>|\n",
754
+ "cinema art|\n",
755
+ "elves</w>|\n",
756
+ "movie texture|\n",
757
+ "anarch-|\n",
758
+ "oxi-|\n",
759
+ "sura-|\n",
760
+ "widow</w>|\n",
761
+ "vibrant Concept art|\n",
762
+ "goddess</w>|\n",
763
+ "Masterpiece Sci-Fi|\n",
764
+ "recruited</w>|\n",
765
+ "terra</w>|\n",
766
+ "sirens</w>|\n",
767
+ "defiance</w>|\n",
768
+ "sprite</w>|\n",
769
+ "soaked</w>|\n",
770
+ "kavan-|\n",
771
+ "holocau-|\n",
772
+ "soldiers</w>|\n",
773
+ "artstation|\n",
774
+ "valor</w>|\n",
775
+ "etty</w>|\n",
776
+ "marshals</w>|\n",
777
+ "clint</w>|\n",
778
+ "hd 8k masterpiece|\n",
779
+ "bluec-|\n",
780
+ "poppins</w>|\n",
781
+ "deeps darks|\n",
782
+ "hera</w>|\n",
783
+ "marvel 1girl|\n",
784
+ "guardian</w>|\n",
785
+ "references</w>|\n",
786
+ "woman solo|\n",
787
+ "4K 2girl|\n",
788
+ "characters</w>|\n",
789
+ "resolve</w>|\n",
790
+ "hail</w>|\n",
791
+ "sarmy</w>|\n",
792
+ "watched</w>|\n",
793
+ "drow-|\n",
794
+ "absurdres highres|\n",
795
+ "ogue</w>|\n",
796
+ "eq-|\n",
797
+ "snapped</w>|\n",
798
+ "atrix</w>|\n",
799
+ "navis</w>|\n",
800
+ "bodypaint|\n",
801
+ "striking</w>|\n",
802
+ "in that scene|\n",
803
+ "legion-|\n",
804
+ "hue-|\n",
805
+ "empowered</w>|\n",
806
+ "faction</w>|\n",
807
+ "widows</w>|\n",
808
+ "1girl vast|\n",
809
+ "destiny</w>|\n",
810
+ "visually</w>|\n",
811
+ "aspirations</w>|\n",
812
+ "tson</w>|\n",
813
+ "highres ultrares|\n",
814
+ "tali-|\n",
815
+ "swoon</w>|\n",
816
+ "aroo</w>|\n",
817
+ "oxi</w>|\n",
818
+ "blue filter|\n",
819
+ "blue theme|\n",
820
+ "women</w>|\n",
821
+ "orah</w>|\n",
822
+ "backlash</w>|\n",
823
+ "legendof-}\n"
824
  ]
825
  }
826
  ]
827
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
  {
829
  "cell_type": "code",
830
  "source": [
 
1141
  "execution_count": null,
1142
  "outputs": []
1143
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1144
  {
1145
  "cell_type": "code",
1146
  "source": [
 
1180
  "execution_count": null,
1181
  "outputs": []
1182
  },
1183
+ {
1184
+ "cell_type": "code",
1185
+ "source": [
1186
+ "# @title ⚡ Get similiar tokens (not updated yet)\n",
1187
+ "import torch\n",
1188
+ "from transformers import AutoTokenizer\n",
1189
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1190
+ "\n",
1191
+ "# @markdown Write name of token to match against\n",
1192
+ "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
1193
+ "\n",
1194
+ "prompt = token_name\n",
1195
+ "# @markdown (optional) Mix the token with something else\n",
1196
+ "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
1197
+ "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
1198
+ "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
1199
+ "# @markdown Limit char size of included token\n",
1200
+ "\n",
1201
+ "min_char_size = 0 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1202
+ "char_range = 50 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1203
+ "\n",
1204
+ "tokenizer_output = tokenizer(text = prompt)\n",
1205
+ "input_ids = tokenizer_output['input_ids']\n",
1206
+ "id_A = input_ids[1]\n",
1207
+ "A = torch.tensor(token[id_A])\n",
1208
+ "A = A/A.norm(p=2, dim=-1, keepdim=True)\n",
1209
+ "#-----#\n",
1210
+ "tokenizer_output = tokenizer(text = mix_with)\n",
1211
+ "input_ids = tokenizer_output['input_ids']\n",
1212
+ "id_C = input_ids[1]\n",
1213
+ "C = torch.tensor(token[id_C])\n",
1214
+ "C = C/C.norm(p=2, dim=-1, keepdim=True)\n",
1215
+ "#-----#\n",
1216
+ "sim_AC = torch.dot(A,C)\n",
1217
+ "#-----#\n",
1218
+ "print(input_ids)\n",
1219
+ "#-----#\n",
1220
+ "\n",
1221
+ "#if no imput exists we just randomize the entire thing\n",
1222
+ "if (prompt == \"\"):\n",
1223
+ " id_A = -1\n",
1224
+ " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n",
1225
+ " R = torch.rand(A.shape)\n",
1226
+ " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1227
+ " A = R\n",
1228
+ " name_A = 'random_A'\n",
1229
+ "\n",
1230
+ "#if no imput exists we just randomize the entire thing\n",
1231
+ "if (mix_with == \"\"):\n",
1232
+ " id_C = -1\n",
1233
+ " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n",
1234
+ " R = torch.rand(A.shape)\n",
1235
+ " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1236
+ " C = R\n",
1237
+ " name_C = 'random_C'\n",
1238
+ "\n",
1239
+ "name_A = \"A of random type\"\n",
1240
+ "if (id_A>-1):\n",
1241
+ " name_A = vocab(id_A)\n",
1242
+ "\n",
1243
+ "name_C = \"token C of random type\"\n",
1244
+ "if (id_C>-1):\n",
1245
+ " name_C = vocab(id_C)\n",
1246
+ "\n",
1247
+ "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {round(sim_AC.item()*100,2)} %\")\n",
1248
+ "\n",
1249
+ "if (mix_method == \"None\"):\n",
1250
+ " print(\"No operation\")\n",
1251
+ "\n",
1252
+ "if (mix_method == \"Average\"):\n",
1253
+ " A = w*A + (1-w)*C\n",
1254
+ " _A = LA.vector_norm(A, ord=2)\n",
1255
+ " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n",
1256
+ "\n",
1257
+ "if (mix_method == \"Subtract\"):\n",
1258
+ " tmp = w*A - (1-w)*C\n",
1259
+ " tmp = tmp/tmp.norm(p=2, dim=-1, keepdim=True)\n",
1260
+ " A = tmp\n",
1261
+ " #//---//\n",
1262
+ " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n",
1263
+ "\n",
1264
+ "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n",
1265
+ "\n",
1266
+ "dots = torch.zeros(NUM_TOKENS)\n",
1267
+ "for index in range(NUM_TOKENS):\n",
1268
+ " id_B = index\n",
1269
+ " B = torch.tensor(token[id_B])\n",
1270
+ " B = B/B.norm(p=2, dim=-1, keepdim=True)\n",
1271
+ " sim_AB = torch.dot(A,B)\n",
1272
+ " dots[index] = sim_AB\n",
1273
+ "\n",
1274
+ "\n",
1275
+ "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
1276
+ "#----#\n",
1277
+ "if (mix_method == \"Average\"):\n",
1278
+ " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1279
+ "if (mix_method == \"Subtract\"):\n",
1280
+ " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1281
+ "if (mix_method == \"None\"):\n",
1282
+ " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
1283
+ "\n",
1284
+ "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n",
1285
+ "\n",
1286
+ "# @markdown Set print options\n",
1287
+ "list_size = 100 # @param {type:'number'}\n",
1288
+ "print_ID = False # @param {type:\"boolean\"}\n",
1289
+ "print_Similarity = True # @param {type:\"boolean\"}\n",
1290
+ "print_Name = True # @param {type:\"boolean\"}\n",
1291
+ "print_Divider = True # @param {type:\"boolean\"}\n",
1292
+ "\n",
1293
+ "\n",
1294
+ "if (print_Divider):\n",
1295
+ " print('//---//')\n",
1296
+ "\n",
1297
+ "print('')\n",
1298
+ "print('Here is the result : ')\n",
1299
+ "print('')\n",
1300
+ "\n",
1301
+ "for index in range(list_size):\n",
1302
+ " id = indices[index].item()\n",
1303
+ " if (print_Name):\n",
1304
+ " print(f'{vocab(id)}') # vocab item\n",
1305
+ " if (print_ID):\n",
1306
+ " print(f'ID = {id}') # IDs\n",
1307
+ " if (print_Similarity):\n",
1308
+ " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
1309
+ " if (print_Divider):\n",
1310
+ " print('--------')\n",
1311
+ "\n",
1312
+ "#Print the sorted list from above result\n",
1313
+ "\n",
1314
+ "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
1315
+ "\n",
1316
+ "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n",
1317
+ "\n",
1318
+ "# Save results as .db file\n",
1319
+ "import shelve\n",
1320
+ "VOCAB_FILENAME = 'tokens_most_similiar_to_' + name_A.replace('</w>','').strip()\n",
1321
+ "d = shelve.open(VOCAB_FILENAME)\n",
1322
+ "#NUM TOKENS == 49407\n",
1323
+ "for index in range(NUM_TOKENS):\n",
1324
+ " #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
1325
+ " d[f'{index}']= vocab(indices[index].item()) #<---- write values to .db file\n",
1326
+ "#----#\n",
1327
+ "d.close() #close the file\n",
1328
+ "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
1329
+ ],
1330
+ "metadata": {
1331
+ "id": "iWeFnT1gAx6A"
1332
+ },
1333
+ "execution_count": null,
1334
+ "outputs": []
1335
+ },
1336
  {
1337
  "cell_type": "markdown",
1338
  "source": [
 
1382
  "my_mkdirs('/content/text_encodings/')\n",
1383
  "filename = ''\n",
1384
  "\n",
1385
+ "NUM_FILES = 10\n",
1386
  "\n",
1387
  "for file_index in range(NUM_FILES + 1):\n",
1388
  " if file_index <1: continue\n",
1389
  " #if file_index >4: break\n",
1390
+ " filename = f'🧿📘 fusion-t2i-civitai-0-20-chars-mix-{file_index}'\n",
1391
  " #🦜 fusion-t2i-prompt-features-1.json\n",
1392
  "\n",
1393
  " # Read suffix.json\n",
1394
+ " %cd /content/text-to-image-prompts/civitai-prompts/blue/text\n",
1395
  " with open(filename + '.json', 'r') as f:\n",
1396
  " data = json.load(f)\n",
1397
  " _df = pd.DataFrame({'count': data})['count']\n",
 
1427
  {
1428
  "cell_type": "code",
1429
  "source": [
1430
+ "# @title Download the created JSON as .zip file\n",
1431
  "%cd /content/\n",
1432
+ "!zip -r /content/blue.zip /content/text-to-image-prompts/civitai-prompts/blue/text"
1433
  ],
1434
  "metadata": {
1435
  "id": "gX-sHZPWj4Lt"