codeShare commited on
Commit
dadf5ec
·
verified ·
1 Parent(s): a976af7

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +436 -314
sd_token_similarity_calculator.ipynb CHANGED
@@ -122,10 +122,30 @@
122
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n"
123
  ],
124
  "metadata": {
125
- "id": "rUXQ73IbonHY"
 
 
 
 
126
  },
127
- "execution_count": null,
128
- "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  },
130
  {
131
  "cell_type": "code",
@@ -209,7 +229,7 @@
209
  "base_uri": "https://localhost:8080/"
210
  }
211
  },
212
- "execution_count": 5,
213
  "outputs": [
214
  {
215
  "output_type": "stream",
@@ -487,7 +507,7 @@
487
  "metadata": {
488
  "id": "xc-PbIYF428y"
489
  },
490
- "execution_count": 6,
491
  "outputs": []
492
  },
493
  {
@@ -541,7 +561,7 @@
541
  "base_uri": "https://localhost:8080/"
542
  }
543
  },
544
- "execution_count": 7,
545
  "outputs": [
546
  {
547
  "output_type": "stream",
@@ -921,6 +941,161 @@
921
  }
922
  ]
923
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
924
  {
925
  "cell_type": "code",
926
  "source": [
@@ -1240,330 +1415,34 @@
1240
  {
1241
  "cell_type": "code",
1242
  "source": [
1243
- "# @title 💫 Compare Text encodings\n",
1244
- "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
1245
- "prompt_B = \"bike \" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
1246
- "use_token_padding = True # param {type:\"boolean\"} <----- Enabled by default\n",
1247
- "#-----#\n",
1248
- "from transformers import AutoTokenizer\n",
1249
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\",\n",
1250
- "clean_up_tokenization_spaces = False)\n",
1251
- "#-----#\n",
1252
- "from transformers import CLIPProcessor, CLIPModel\n",
1253
- "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
1254
- "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
1255
- "#----#\n",
1256
- "inputs = tokenizer(text = prompt_A, padding=True, return_tensors=\"pt\")\n",
1257
- "text_features_A = model.get_text_features(**inputs)\n",
1258
- "text_features_A = text_features_A / text_features_A.norm(p=2, dim=-1, keepdim=True)\n",
1259
- "name_A = prompt_A\n",
1260
- "#----#\n",
1261
- "inputs = tokenizer(text = prompt_B, padding=True, return_tensors=\"pt\")\n",
1262
- "text_features_B = model.get_text_features(**inputs)\n",
1263
- "text_features_B = text_features_B / text_features_B.norm(p=2, dim=-1, keepdim=True)\n",
1264
- "name_B = prompt_B\n",
1265
- "#----#\n",
1266
- "import torch\n",
1267
- "sim_AB = torch.nn.functional.cosine_similarity(text_features_A, text_features_B)\n",
1268
- "#----#\n",
1269
- "print(f'The similarity between the text_encoding for A:\"{prompt_A}\" and B: \"{prompt_B}\" is {round(sim_AB.item()*100,2)} %')"
1270
  ],
1271
  "metadata": {
1272
- "id": "QQOjh5BvnG8M",
1273
- "collapsed": true,
1274
- "cellView": "form"
1275
  },
1276
  "execution_count": null,
1277
  "outputs": []
1278
  },
1279
  {
1280
- "cell_type": "code",
1281
  "source": [
1282
- "# @title ⚡ Get similiar tokens (not updated yet)\n",
1283
- "import torch\n",
1284
- "from transformers import AutoTokenizer\n",
1285
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1286
  "\n",
1287
- "# @markdown Write name of token to match against\n",
1288
- "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
1289
  "\n",
1290
- "prompt = token_name\n",
1291
- "# @markdown (optional) Mix the token with something else\n",
1292
- "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
1293
- "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
1294
- "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
1295
- "# @markdown Limit char size of included token\n",
1296
  "\n",
1297
- "min_char_size = 0 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1298
- "char_range = 50 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1299
  "\n",
1300
- "tokenizer_output = tokenizer(text = prompt)\n",
1301
- "input_ids = tokenizer_output['input_ids']\n",
1302
- "id_A = input_ids[1]\n",
1303
- "A = torch.tensor(token[id_A])\n",
1304
- "A = A/A.norm(p=2, dim=-1, keepdim=True)\n",
1305
- "#-----#\n",
1306
- "tokenizer_output = tokenizer(text = mix_with)\n",
1307
- "input_ids = tokenizer_output['input_ids']\n",
1308
- "id_C = input_ids[1]\n",
1309
- "C = torch.tensor(token[id_C])\n",
1310
- "C = C/C.norm(p=2, dim=-1, keepdim=True)\n",
1311
- "#-----#\n",
1312
- "sim_AC = torch.dot(A,C)\n",
1313
- "#-----#\n",
1314
- "print(input_ids)\n",
1315
- "#-----#\n",
1316
  "\n",
1317
- "#if no imput exists we just randomize the entire thing\n",
1318
- "if (prompt == \"\"):\n",
1319
- " id_A = -1\n",
1320
- " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n",
1321
- " R = torch.rand(A.shape)\n",
1322
- " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1323
- " A = R\n",
1324
- " name_A = 'random_A'\n",
1325
  "\n",
1326
- "#if no imput exists we just randomize the entire thing\n",
1327
- "if (mix_with == \"\"):\n",
1328
- " id_C = -1\n",
1329
- " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n",
1330
- " R = torch.rand(A.shape)\n",
1331
- " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1332
- " C = R\n",
1333
- " name_C = 'random_C'\n",
1334
  "\n",
1335
- "name_A = \"A of random type\"\n",
1336
- "if (id_A>-1):\n",
1337
- " name_A = vocab(id_A)\n",
1338
  "\n",
1339
- "name_C = \"token C of random type\"\n",
1340
- "if (id_C>-1):\n",
1341
- " name_C = vocab(id_C)\n",
1342
- "\n",
1343
- "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {round(sim_AC.item()*100,2)} %\")\n",
1344
- "\n",
1345
- "if (mix_method == \"None\"):\n",
1346
- " print(\"No operation\")\n",
1347
- "\n",
1348
- "if (mix_method == \"Average\"):\n",
1349
- " A = w*A + (1-w)*C\n",
1350
- " _A = LA.vector_norm(A, ord=2)\n",
1351
- " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n",
1352
- "\n",
1353
- "if (mix_method == \"Subtract\"):\n",
1354
- " tmp = w*A - (1-w)*C\n",
1355
- " tmp = tmp/tmp.norm(p=2, dim=-1, keepdim=True)\n",
1356
- " A = tmp\n",
1357
- " #//---//\n",
1358
- " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n",
1359
- "\n",
1360
- "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n",
1361
- "\n",
1362
- "dots = torch.zeros(NUM_TOKENS)\n",
1363
- "for index in range(NUM_TOKENS):\n",
1364
- " id_B = index\n",
1365
- " B = torch.tensor(token[id_B])\n",
1366
- " B = B/B.norm(p=2, dim=-1, keepdim=True)\n",
1367
- " sim_AB = torch.dot(A,B)\n",
1368
- " dots[index] = sim_AB\n",
1369
- "\n",
1370
- "\n",
1371
- "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
1372
- "#----#\n",
1373
- "if (mix_method == \"Average\"):\n",
1374
- " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1375
- "if (mix_method == \"Subtract\"):\n",
1376
- " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1377
- "if (mix_method == \"None\"):\n",
1378
- " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
1379
- "\n",
1380
- "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n",
1381
- "\n",
1382
- "# @markdown Set print options\n",
1383
- "list_size = 100 # @param {type:'number'}\n",
1384
- "print_ID = False # @param {type:\"boolean\"}\n",
1385
- "print_Similarity = True # @param {type:\"boolean\"}\n",
1386
- "print_Name = True # @param {type:\"boolean\"}\n",
1387
- "print_Divider = True # @param {type:\"boolean\"}\n",
1388
- "\n",
1389
- "\n",
1390
- "if (print_Divider):\n",
1391
- " print('//---//')\n",
1392
- "\n",
1393
- "print('')\n",
1394
- "print('Here is the result : ')\n",
1395
- "print('')\n",
1396
- "\n",
1397
- "for index in range(list_size):\n",
1398
- " id = indices[index].item()\n",
1399
- " if (print_Name):\n",
1400
- " print(f'{vocab(id)}') # vocab item\n",
1401
- " if (print_ID):\n",
1402
- " print(f'ID = {id}') # IDs\n",
1403
- " if (print_Similarity):\n",
1404
- " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
1405
- " if (print_Divider):\n",
1406
- " print('--------')\n",
1407
- "\n",
1408
- "#Print the sorted list from above result\n",
1409
- "\n",
1410
- "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
1411
- "\n",
1412
- "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n",
1413
- "\n",
1414
- "# Save results as .db file\n",
1415
- "import shelve\n",
1416
- "VOCAB_FILENAME = 'tokens_most_similiar_to_' + name_A.replace('</w>','').strip()\n",
1417
- "d = shelve.open(VOCAB_FILENAME)\n",
1418
- "#NUM TOKENS == 49407\n",
1419
- "for index in range(NUM_TOKENS):\n",
1420
- " #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
1421
- " d[f'{index}']= vocab(indices[index].item()) #<---- write values to .db file\n",
1422
- "#----#\n",
1423
- "d.close() #close the file\n",
1424
- "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
1425
- ],
1426
- "metadata": {
1427
- "id": "iWeFnT1gAx6A"
1428
- },
1429
- "execution_count": null,
1430
- "outputs": []
1431
- },
1432
- {
1433
- "cell_type": "markdown",
1434
- "source": [
1435
- "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime."
1436
- ],
1437
- "metadata": {
1438
- "id": "hyK423TQCRup"
1439
- }
1440
- },
1441
- {
1442
- "cell_type": "code",
1443
- "source": [
1444
- "%cd /content/\n",
1445
- "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
1446
- ],
1447
- "metadata": {
1448
- "id": "GPAUFxZgPfrY"
1449
- },
1450
- "execution_count": null,
1451
- "outputs": []
1452
- },
1453
- {
1454
- "cell_type": "code",
1455
- "source": [
1456
- "# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
1457
- "\n",
1458
- "import json\n",
1459
- "import pandas as pd\n",
1460
- "import os\n",
1461
- "import shelve\n",
1462
- "import torch\n",
1463
- "from safetensors.torch import save_file\n",
1464
- "\n",
1465
- "def my_mkdirs(folder):\n",
1466
- " if os.path.exists(folder)==False:\n",
1467
- " os.makedirs(folder)\n",
1468
- "\n",
1469
- "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1470
- "from transformers import AutoTokenizer\n",
1471
- "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1472
- "from transformers import CLIPProcessor, CLIPModel\n",
1473
- "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
1474
- "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\").to(device)\n",
1475
- "\n",
1476
- "%cd /content/\n",
1477
- "\n",
1478
- "my_mkdirs('/content/text_encodings/')\n",
1479
- "filename = ''\n",
1480
- "\n",
1481
- "NUM_FILES = 10\n",
1482
- "\n",
1483
- "for file_index in range(NUM_FILES + 1):\n",
1484
- " if file_index <1: continue\n",
1485
- " #if file_index >4: break\n",
1486
- " filename = f'🧿📘 fusion-t2i-civitai-0-20-chars-mix-{file_index}'\n",
1487
- " #🦜 fusion-t2i-prompt-features-1.json\n",
1488
- "\n",
1489
- " # Read suffix.json\n",
1490
- " %cd /content/text-to-image-prompts/civitai-prompts/blue/text\n",
1491
- " with open(filename + '.json', 'r') as f:\n",
1492
- " data = json.load(f)\n",
1493
- " _df = pd.DataFrame({'count': data})['count']\n",
1494
- " prompts = {\n",
1495
- " key : value for key, value in _df.items()\n",
1496
- " }\n",
1497
- " NUM_ITEMS = int(prompts[\"0\"])\n",
1498
- " #------#\n",
1499
- "\n",
1500
- " # Calculate text_encoding for .json file contents and results as .db file\n",
1501
- "\n",
1502
- " %cd /content/text_encodings/\n",
1503
- " text_encoding_dict = {}\n",
1504
- " for index in range(NUM_ITEMS + 1):\n",
1505
- " inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
1506
- " text_features = model.get_text_features(**inputs).to(device)\n",
1507
- " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
1508
- " text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
1509
- " save_file(text_encoding_dict, f'{filename}.safetensors')\n",
1510
- " #----#\n",
1511
- "\n",
1512
- "#from safetensors.torch import load_file\n",
1513
- "#%cd /content/text_encodings\n",
1514
- "#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
1515
- "#print(loaded[\"325\"])"
1516
- ],
1517
- "metadata": {
1518
- "id": "9ZiTsF9jV0TV"
1519
- },
1520
- "execution_count": null,
1521
- "outputs": []
1522
- },
1523
- {
1524
- "cell_type": "code",
1525
- "source": [
1526
- "# @title Download the created JSON as .zip file\n",
1527
- "%cd /content/\n",
1528
- "!zip -r /content/blue.zip /content/text-to-image-prompts/civitai-prompts/blue/text"
1529
- ],
1530
- "metadata": {
1531
- "id": "gX-sHZPWj4Lt"
1532
- },
1533
- "execution_count": null,
1534
- "outputs": []
1535
- },
1536
- {
1537
- "cell_type": "code",
1538
- "source": [
1539
- "# @title Download the created text_encodings as .zip file\n",
1540
- "%cd /content/\n",
1541
- "!zip -r /content/text-encodings.zip /content/text_encodings"
1542
- ],
1543
- "metadata": {
1544
- "id": "b3DUPYfskAIc"
1545
- },
1546
- "execution_count": null,
1547
- "outputs": []
1548
- },
1549
- {
1550
- "cell_type": "markdown",
1551
- "source": [
1552
- "\n",
1553
- "\n",
1554
- "# How does this notebook work?\n",
1555
- "\n",
1556
- "Similiar vectors = similiar output in the SD 1.5 / SDXL / FLUX model\n",
1557
- "\n",
1558
- "CLIP converts the prompt text to vectors (“tensors”) , with float32 values usually ranging from -1 to 1.\n",
1559
- "\n",
1560
- "Dimensions are \\[ 1x768 ] tensors for SD 1.5 , and a \\[ 1x768 , 1x1024 ] tensor for SDXL and FLUX.\n",
1561
- "\n",
1562
- "The SD models and FLUX converts these vectors to an image.\n",
1563
- "\n",
1564
- "This notebook takes an input string , tokenizes it and matches the first token against the 49407 token vectors in the vocab.json : [https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main/tokenizer](https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fblack-forest-labs%2FFLUX.1-dev%2Ftree%2Fmain%2Ftokenizer)\n",
1565
- "\n",
1566
- "It finds the “most similiar tokens” in the list. Similarity is the theta angle between the token vectors.\n",
1567
  "\n",
1568
  "<div>\n",
1569
  "<img src=\"https://huggingface.co/datasets/codeShare/sd_tokens/resolve/main/cosine.jpeg\" width=\"300\"/>\n",
@@ -1956,6 +1835,249 @@
1956
  },
1957
  "execution_count": null,
1958
  "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1959
  }
1960
  ]
1961
  }
 
122
  "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts\n"
123
  ],
124
  "metadata": {
125
+ "id": "rUXQ73IbonHY",
126
+ "outputId": "aa0e25d1-f6b8-46ad-c1c1-0ccd70952cff",
127
+ "colab": {
128
+ "base_uri": "https://localhost:8080/"
129
+ }
130
  },
131
+ "execution_count": 1,
132
+ "outputs": [
133
+ {
134
+ "output_type": "stream",
135
+ "name": "stdout",
136
+ "text": [
137
+ "/content\n",
138
+ "Cloning into 'text-to-image-prompts'...\n",
139
+ "remote: Enumerating objects: 478, done.\u001b[K\n",
140
+ "remote: Counting objects: 100% (475/475), done.\u001b[K\n",
141
+ "remote: Compressing objects: 100% (452/452), done.\u001b[K\n",
142
+ "remote: Total 478 (delta 82), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
143
+ "Receiving objects: 100% (478/478), 1.93 MiB | 6.96 MiB/s, done.\n",
144
+ "Resolving deltas: 100% (82/82), done.\n",
145
+ "Filtering content: 100% (95/95), 305.98 MiB | 59.56 MiB/s, done.\n"
146
+ ]
147
+ }
148
+ ]
149
  },
150
  {
151
  "cell_type": "code",
 
229
  "base_uri": "https://localhost:8080/"
230
  }
231
  },
232
+ "execution_count": null,
233
  "outputs": [
234
  {
235
  "output_type": "stream",
 
507
  "metadata": {
508
  "id": "xc-PbIYF428y"
509
  },
510
+ "execution_count": null,
511
  "outputs": []
512
  },
513
  {
 
561
  "base_uri": "https://localhost:8080/"
562
  }
563
  },
564
+ "execution_count": null,
565
  "outputs": [
566
  {
567
  "output_type": "stream",
 
941
  }
942
  ]
943
  },
944
+ {
945
+ "cell_type": "code",
946
+ "source": [
947
+ "# @title 💫 Compare Text encodings\n",
948
+ "prompt_A = \"banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
949
+ "prompt_B = \"bike \" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
950
+ "use_token_padding = True # param {type:\"boolean\"} <----- Enabled by default\n",
951
+ "#-----#\n",
952
+ "from transformers import AutoTokenizer\n",
953
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\",\n",
954
+ "clean_up_tokenization_spaces = False)\n",
955
+ "#-----#\n",
956
+ "from transformers import CLIPProcessor, CLIPModel\n",
957
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
958
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
959
+ "#----#\n",
960
+ "inputs = tokenizer(text = prompt_A, padding=True, return_tensors=\"pt\")\n",
961
+ "text_features_A = model.get_text_features(**inputs)\n",
962
+ "text_features_A = text_features_A / text_features_A.norm(p=2, dim=-1, keepdim=True)\n",
963
+ "name_A = prompt_A\n",
964
+ "#----#\n",
965
+ "inputs = tokenizer(text = prompt_B, padding=True, return_tensors=\"pt\")\n",
966
+ "text_features_B = model.get_text_features(**inputs)\n",
967
+ "text_features_B = text_features_B / text_features_B.norm(p=2, dim=-1, keepdim=True)\n",
968
+ "name_B = prompt_B\n",
969
+ "#----#\n",
970
+ "import torch\n",
971
+ "sim_AB = torch.nn.functional.cosine_similarity(text_features_A, text_features_B)\n",
972
+ "#----#\n",
973
+ "print(f'The similarity between the text_encoding for A:\"{prompt_A}\" and B: \"{prompt_B}\" is {round(sim_AB.item()*100,2)} %')"
974
+ ],
975
+ "metadata": {
976
+ "id": "QQOjh5BvnG8M",
977
+ "collapsed": true,
978
+ "cellView": "form"
979
+ },
980
+ "execution_count": null,
981
+ "outputs": []
982
+ },
983
+ {
984
+ "cell_type": "markdown",
985
+ "source": [
986
+ "You can write an url or upload a file locally from your device to use as reference. The image will by saved in the 'sd_tokens' folder. Note that the 'sd_tokens' folder will be deleted upon exiting this runtime."
987
+ ],
988
+ "metadata": {
989
+ "id": "hyK423TQCRup"
990
+ }
991
+ },
992
+ {
993
+ "cell_type": "code",
994
+ "source": [
995
+ "%cd /content/\n",
996
+ "!git clone https://huggingface.co/datasets/codeShare/text-to-image-prompts"
997
+ ],
998
+ "metadata": {
999
+ "id": "GPAUFxZgPfrY"
1000
+ },
1001
+ "execution_count": null,
1002
+ "outputs": []
1003
+ },
1004
+ {
1005
+ "cell_type": "code",
1006
+ "source": [
1007
+ "# @title Make your own text_encodings .pt file for later use (using GPU is recommended to speed things up)\n",
1008
+ "\n",
1009
+ "import json\n",
1010
+ "import pandas as pd\n",
1011
+ "import os\n",
1012
+ "import shelve\n",
1013
+ "import torch\n",
1014
+ "from safetensors.torch import save_file\n",
1015
+ "\n",
1016
+ "def my_mkdirs(folder):\n",
1017
+ " if os.path.exists(folder)==False:\n",
1018
+ " os.makedirs(folder)\n",
1019
+ "\n",
1020
+ "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
1021
+ "from transformers import AutoTokenizer\n",
1022
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1023
+ "from transformers import CLIPProcessor, CLIPModel\n",
1024
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
1025
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\").to(device)\n",
1026
+ "\n",
1027
+ "%cd /content/\n",
1028
+ "\n",
1029
+ "my_mkdirs('/content/text_encodings/')\n",
1030
+ "filename = ''\n",
1031
+ "\n",
1032
+ "NUM_FILES = 9\n",
1033
+ "\n",
1034
+ "\n",
1035
+ "filename = '🆔👩_🦰 fusion-t2i-girl-firstname-1'\n",
1036
+ "%cd /content/text-to-image-prompts/names/firstnames/text\n",
1037
+ "with open(filename + '.json', 'r') as f:\n",
1038
+ " data = json.load(f)\n",
1039
+ "_df = pd.DataFrame({'count': data})['count']\n",
1040
+ "firstname = {\n",
1041
+ " key : value for key, value in _df.items()\n",
1042
+ "}\n",
1043
+ "\n",
1044
+ "NUM_FIRSTNAME = 100901\n",
1045
+ "\n",
1046
+ "for file_index in range(NUM_FILES + 1):\n",
1047
+ " if file_index <1: continue\n",
1048
+ " #if file_index >4: break\n",
1049
+ " filename = f'👱_♀️ fusion-t2i-lastnames-1 plugin-{file_index}'\n",
1050
+ " #🦜 fusion-t2i-prompt-features-1.json\n",
1051
+ "\n",
1052
+ " # Read suffix.json\n",
1053
+ " %cd /content/text-to-image-prompts/names/lastnames/text\n",
1054
+ " with open(filename + '.json', 'r') as f:\n",
1055
+ " data = json.load(f)\n",
1056
+ " _df = pd.DataFrame({'count': data})['count']\n",
1057
+ " names = {\n",
1058
+ " key : firstname[f'{random.randint(2,NUM_FIRSTNAME)}'] + f'{value}' for key, value in _df.items()\n",
1059
+ " }\n",
1060
+ " NUM_ITEMS = int(prompts[\"0\"])\n",
1061
+ " #------#\n",
1062
+ "\n",
1063
+ " # Calculate text_encoding for .json file contents and results as .db file\n",
1064
+ "\n",
1065
+ " %cd /content/text_encodings/\n",
1066
+ " text_encoding_dict = {}\n",
1067
+ " for index in range(NUM_ITEMS + 1):\n",
1068
+ " inputs = tokenizer(text = '' + prompts[f'{index}'], padding=True, return_tensors=\"pt\").to(device)\n",
1069
+ " text_features = model.get_text_features(**inputs).to(device)\n",
1070
+ " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True).to(device)\n",
1071
+ " text_encoding_dict[f'{index}'] = text_features.to('cpu')\n",
1072
+ " save_file(text_encoding_dict, f'{filename}.safetensors')\n",
1073
+ " #----#\n",
1074
+ "\n",
1075
+ "#from safetensors.torch import load_file\n",
1076
+ "#%cd /content/text_encodings\n",
1077
+ "#loaded = load_file('🦜 fusion-t2i-prompt-features-1.safetensors')\n",
1078
+ "#print(loaded[\"325\"])"
1079
+ ],
1080
+ "metadata": {
1081
+ "id": "9ZiTsF9jV0TV"
1082
+ },
1083
+ "execution_count": null,
1084
+ "outputs": []
1085
+ },
1086
+ {
1087
+ "cell_type": "code",
1088
+ "source": [
1089
+ "# @title Download the created JSON as .zip file\n",
1090
+ "%cd /content/\n",
1091
+ "!zip -r /content/blue.zip /content/text-to-image-prompts/civitai-prompts/blue/text"
1092
+ ],
1093
+ "metadata": {
1094
+ "id": "gX-sHZPWj4Lt"
1095
+ },
1096
+ "execution_count": null,
1097
+ "outputs": []
1098
+ },
1099
  {
1100
  "cell_type": "code",
1101
  "source": [
 
1415
  {
1416
  "cell_type": "code",
1417
  "source": [
1418
+ "# @title Download the created text_encodings as .zip file\n",
1419
+ "%cd /content/\n",
1420
+ "!zip -r /content/text-encodings.zip /content/text_encodings"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1421
  ],
1422
  "metadata": {
1423
+ "id": "b3DUPYfskAIc"
 
 
1424
  },
1425
  "execution_count": null,
1426
  "outputs": []
1427
  },
1428
  {
1429
+ "cell_type": "markdown",
1430
  "source": [
 
 
 
 
1431
  "\n",
 
 
1432
  "\n",
1433
+ "# How does this notebook work?\n",
 
 
 
 
 
1434
  "\n",
1435
+ "Similiar vectors = similiar output in the SD 1.5 / SDXL / FLUX model\n",
 
1436
  "\n",
1437
+ "CLIP converts the prompt text to vectors (“tensors”) , with float32 values usually ranging from -1 to 1.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1438
  "\n",
1439
+ "Dimensions are \\[ 1x768 ] tensors for SD 1.5 , and a \\[ 1x768 , 1x1024 ] tensor for SDXL and FLUX.\n",
 
 
 
 
 
 
 
1440
  "\n",
1441
+ "The SD models and FLUX converts these vectors to an image.\n",
 
 
 
 
 
 
 
1442
  "\n",
1443
+ "This notebook takes an input string , tokenizes it and matches the first token against the 49407 token vectors in the vocab.json : [https://huggingface.co/black-forest-labs/FLUX.1-dev/tree/main/tokenizer](https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fblack-forest-labs%2FFLUX.1-dev%2Ftree%2Fmain%2Ftokenizer)\n",
 
 
1444
  "\n",
1445
+ "It finds the “most similiar tokens” in the list. Similarity is the theta angle between the token vectors.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1446
  "\n",
1447
  "<div>\n",
1448
  "<img src=\"https://huggingface.co/datasets/codeShare/sd_tokens/resolve/main/cosine.jpeg\" width=\"300\"/>\n",
 
1835
  },
1836
  "execution_count": null,
1837
  "outputs": []
1838
+ },
1839
+ {
1840
+ "cell_type": "code",
1841
+ "source": [
1842
+ "# @title ⚡ Get similiar tokens (not updated yet)\n",
1843
+ "import torch\n",
1844
+ "from transformers import AutoTokenizer\n",
1845
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1846
+ "\n",
1847
+ "# @markdown Write name of token to match against\n",
1848
+ "token_name = \"banana \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
1849
+ "\n",
1850
+ "prompt = token_name\n",
1851
+ "# @markdown (optional) Mix the token with something else\n",
1852
+ "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
1853
+ "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
1854
+ "w = 0.5 # @param {type:\"slider\", min:0, max:1, step:0.01}\n",
1855
+ "# @markdown Limit char size of included token\n",
1856
+ "\n",
1857
+ "min_char_size = 0 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1858
+ "char_range = 50 # param {type:\"slider\", min:0, max: 50, step:1}\n",
1859
+ "\n",
1860
+ "tokenizer_output = tokenizer(text = prompt)\n",
1861
+ "input_ids = tokenizer_output['input_ids']\n",
1862
+ "id_A = input_ids[1]\n",
1863
+ "A = torch.tensor(token[id_A])\n",
1864
+ "A = A/A.norm(p=2, dim=-1, keepdim=True)\n",
1865
+ "#-----#\n",
1866
+ "tokenizer_output = tokenizer(text = mix_with)\n",
1867
+ "input_ids = tokenizer_output['input_ids']\n",
1868
+ "id_C = input_ids[1]\n",
1869
+ "C = torch.tensor(token[id_C])\n",
1870
+ "C = C/C.norm(p=2, dim=-1, keepdim=True)\n",
1871
+ "#-----#\n",
1872
+ "sim_AC = torch.dot(A,C)\n",
1873
+ "#-----#\n",
1874
+ "print(input_ids)\n",
1875
+ "#-----#\n",
1876
+ "\n",
1877
+ "#if no imput exists we just randomize the entire thing\n",
1878
+ "if (prompt == \"\"):\n",
1879
+ " id_A = -1\n",
1880
+ " print(\"Tokenized prompt tensor A is a random valued tensor with no ID\")\n",
1881
+ " R = torch.rand(A.shape)\n",
1882
+ " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1883
+ " A = R\n",
1884
+ " name_A = 'random_A'\n",
1885
+ "\n",
1886
+ "#if no imput exists we just randomize the entire thing\n",
1887
+ "if (mix_with == \"\"):\n",
1888
+ " id_C = -1\n",
1889
+ " print(\"Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\")\n",
1890
+ " R = torch.rand(A.shape)\n",
1891
+ " R = R/R.norm(p=2, dim=-1, keepdim=True)\n",
1892
+ " C = R\n",
1893
+ " name_C = 'random_C'\n",
1894
+ "\n",
1895
+ "name_A = \"A of random type\"\n",
1896
+ "if (id_A>-1):\n",
1897
+ " name_A = vocab(id_A)\n",
1898
+ "\n",
1899
+ "name_C = \"token C of random type\"\n",
1900
+ "if (id_C>-1):\n",
1901
+ " name_C = vocab(id_C)\n",
1902
+ "\n",
1903
+ "print(f\"The similarity between A '{name_A}' and C '{name_C}' is {round(sim_AC.item()*100,2)} %\")\n",
1904
+ "\n",
1905
+ "if (mix_method == \"None\"):\n",
1906
+ " print(\"No operation\")\n",
1907
+ "\n",
1908
+ "if (mix_method == \"Average\"):\n",
1909
+ " A = w*A + (1-w)*C\n",
1910
+ " _A = LA.vector_norm(A, ord=2)\n",
1911
+ " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = w*A + (1-w)*C , where C is '{name_C}' token , for w = {w} \")\n",
1912
+ "\n",
1913
+ "if (mix_method == \"Subtract\"):\n",
1914
+ " tmp = w*A - (1-w)*C\n",
1915
+ " tmp = tmp/tmp.norm(p=2, dim=-1, keepdim=True)\n",
1916
+ " A = tmp\n",
1917
+ " #//---//\n",
1918
+ " print(f\"Tokenized prompt tensor A '{name_A}' token has been recalculated as A = _A*norm(w*A - (1-w)*C) , where C is '{name_C}' token , for w = {w} \")\n",
1919
+ "\n",
1920
+ "#OPTIONAL : Add/subtract + normalize above result with another token. Leave field empty to get a random value tensor\n",
1921
+ "\n",
1922
+ "dots = torch.zeros(NUM_TOKENS)\n",
1923
+ "for index in range(NUM_TOKENS):\n",
1924
+ " id_B = index\n",
1925
+ " B = torch.tensor(token[id_B])\n",
1926
+ " B = B/B.norm(p=2, dim=-1, keepdim=True)\n",
1927
+ " sim_AB = torch.dot(A,B)\n",
1928
+ " dots[index] = sim_AB\n",
1929
+ "\n",
1930
+ "\n",
1931
+ "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
1932
+ "#----#\n",
1933
+ "if (mix_method == \"Average\"):\n",
1934
+ " print(f'Calculated all cosine-similarities between the average of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1935
+ "if (mix_method == \"Subtract\"):\n",
1936
+ " print(f'Calculated all cosine-similarities between the subtract of token {name_A} and {name_C} with Id_A = {id_A} and mixed Id_C = {id_C} as a 1x{sorted.shape[0]} tensor')\n",
1937
+ "if (mix_method == \"None\"):\n",
1938
+ " print(f'Calculated all cosine-similarities between the token {name_A} with Id_A = {id_A} with the the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
1939
+ "\n",
1940
+ "#Produce a list id IDs that are most similiar to the prompt ID at positiion 1 based on above result\n",
1941
+ "\n",
1942
+ "# @markdown Set print options\n",
1943
+ "list_size = 100 # @param {type:'number'}\n",
1944
+ "print_ID = False # @param {type:\"boolean\"}\n",
1945
+ "print_Similarity = True # @param {type:\"boolean\"}\n",
1946
+ "print_Name = True # @param {type:\"boolean\"}\n",
1947
+ "print_Divider = True # @param {type:\"boolean\"}\n",
1948
+ "\n",
1949
+ "\n",
1950
+ "if (print_Divider):\n",
1951
+ " print('//---//')\n",
1952
+ "\n",
1953
+ "print('')\n",
1954
+ "print('Here is the result : ')\n",
1955
+ "print('')\n",
1956
+ "\n",
1957
+ "for index in range(list_size):\n",
1958
+ " id = indices[index].item()\n",
1959
+ " if (print_Name):\n",
1960
+ " print(f'{vocab(id)}') # vocab item\n",
1961
+ " if (print_ID):\n",
1962
+ " print(f'ID = {id}') # IDs\n",
1963
+ " if (print_Similarity):\n",
1964
+ " print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
1965
+ " if (print_Divider):\n",
1966
+ " print('--------')\n",
1967
+ "\n",
1968
+ "#Print the sorted list from above result\n",
1969
+ "\n",
1970
+ "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
1971
+ "\n",
1972
+ "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n",
1973
+ "\n",
1974
+ "# Save results as .db file\n",
1975
+ "import shelve\n",
1976
+ "VOCAB_FILENAME = 'tokens_most_similiar_to_' + name_A.replace('</w>','').strip()\n",
1977
+ "d = shelve.open(VOCAB_FILENAME)\n",
1978
+ "#NUM TOKENS == 49407\n",
1979
+ "for index in range(NUM_TOKENS):\n",
1980
+ " #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
1981
+ " d[f'{index}']= vocab(indices[index].item()) #<---- write values to .db file\n",
1982
+ "#----#\n",
1983
+ "d.close() #close the file\n",
1984
+ "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
1985
+ ],
1986
+ "metadata": {
1987
+ "id": "iWeFnT1gAx6A"
1988
+ },
1989
+ "execution_count": null,
1990
+ "outputs": []
1991
+ },
1992
+ {
1993
+ "cell_type": "code",
1994
+ "source": [
1995
+ "\n",
1996
+ "# @title Create random names from firstname and lastnames\n",
1997
+ "import random\n",
1998
+ "import json\n",
1999
+ "import pandas as pd\n",
2000
+ "import os\n",
2001
+ "import shelve\n",
2002
+ "import torch\n",
2003
+ "from safetensors.torch import save_file\n",
2004
+ "\n",
2005
+ "def my_mkdirs(folder):\n",
2006
+ " if os.path.exists(folder)==False:\n",
2007
+ " os.makedirs(folder)\n",
2008
+ "\n",
2009
+ "\n",
2010
+ "my_mkdirs('/content/female_full_names/')\n",
2011
+ "filename = ''\n",
2012
+ "\n",
2013
+ "filename = '🆔👩_🦰 fusion-t2i-girl-firstname-1'\n",
2014
+ "%cd /content/text-to-image-prompts/names/firstnames/text\n",
2015
+ "with open(filename + '.json', 'r') as f:\n",
2016
+ " data = json.load(f)\n",
2017
+ "_df = pd.DataFrame({'count': data})['count']\n",
2018
+ "firstname = {\n",
2019
+ " key : value for key, value in _df.items()\n",
2020
+ "}\n",
2021
+ "\n",
2022
+ "NUM_FIRSTNAME = 100901\n",
2023
+ "\n",
2024
+ "\n",
2025
+ "NUM_FILES = 9\n",
2026
+ "for file_index in range(NUM_FILES + 1):\n",
2027
+ " if file_index <1: continue\n",
2028
+ " #if file_index >4: break\n",
2029
+ " filename = f'👱_♀️ fusion-t2i-lastnames-{file_index} plugin'\n",
2030
+ " #🦜 fusion-t2i-prompt-features-1.json\n",
2031
+ "\n",
2032
+ " # Read suffix.json\n",
2033
+ " %cd /content/text-to-image-prompts/names/lastnames/text\n",
2034
+ " with open(filename + '.json', 'r') as f:\n",
2035
+ " data = json.load(f)\n",
2036
+ " _df = pd.DataFrame({'count': data})['count']\n",
2037
+ " names = {\n",
2038
+ " key : firstname[f'{random.randint(2,NUM_FIRSTNAME)}'] + ' ' f'{value}' + ' ' for key, value in _df.items()\n",
2039
+ " }\n",
2040
+ "\n",
2041
+ " index = 0\n",
2042
+ "\n",
2043
+ " for key in names:\n",
2044
+ " index = index + 1\n",
2045
+ " #-----#\n",
2046
+ "\n",
2047
+ " names[f'{1}'] = f'👱_♀️female_fullnames-{file_index}'\n",
2048
+ " names[f'{0}'] = f'{index}'\n",
2049
+ "\n",
2050
+ " txt_filename = f'👱_♀️female_fullnames-{file_index}'\n",
2051
+ " %cd /content/female_full_names/\n",
2052
+ " with open(txt_filename + '.txt', 'w') as f:\n",
2053
+ " f.write(str(names))\n",
2054
+ "\n",
2055
+ " #files.download(f'fullnames-{file_index}.txt')\n",
2056
+ "\n",
2057
+ "#firstname[f'{random.randint(2,NUM_FIRSTNAME)}'] + f'{value}'\n",
2058
+ "\n",
2059
+ " #------#\n",
2060
+ "\n",
2061
+ "\n"
2062
+ ],
2063
+ "metadata": {
2064
+ "id": "JR0wl2ecj6RJ"
2065
+ },
2066
+ "execution_count": null,
2067
+ "outputs": []
2068
+ },
2069
+ {
2070
+ "cell_type": "code",
2071
+ "source": [
2072
+ "# @title Download the created text_encodings as .zip file\n",
2073
+ "%cd /content/\n",
2074
+ "!zip -r /content/female_full_names.zip /content/female_full_names/"
2075
+ ],
2076
+ "metadata": {
2077
+ "id": "IBenvYVrofil"
2078
+ },
2079
+ "execution_count": null,
2080
+ "outputs": []
2081
  }
2082
  ]
2083
  }