codeShare
/

JupyterNotebooks

Safetensors

Model card Files Files and versions Community

codeShare commited on Sep 10, 2024

Commit

cd1aec4

verified ·

1 Parent(s): 7959015

Upload sd_token_similarity_calculator.ipynb

Browse files

Files changed (1) hide show

sd_token_similarity_calculator.ipynb +43 -30

sd_token_similarity_calculator.ipynb CHANGED Viewed

@@ -37,6 +37,9 @@
         "%cd /content/sd_tokens\n",
         "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n",
         "#-----#\n",
         "\n",
         "#Import the vocab.json\n",
         "import json\n",
@@ -130,7 +133,9 @@
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
-        "prompt= \"banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
         "# @markdown (optional) Mix the token with something else\n",
         "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
         "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
@@ -252,10 +257,23 @@
         "\n",
         "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
         "\n",
-        "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID."
       ],
       "metadata": {
-        "id": "iWeFnT1gAx6A"
       },
       "execution_count": null,
       "outputs": []
@@ -268,7 +286,8 @@
         "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n",
         "\n",
         "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n",
-        "\n"
       ],
       "metadata": {
         "id": "IUCuV9RtQpBn"
@@ -277,33 +296,12 @@
     {
       "cell_type": "code",
       "source": [
-        "# @title ⚡💾 Save results as .db file\n",
-        "import shelve\n",
-        "d = shelve.open('tokens_most_similiar_to_' + name_A.replace('</w>','').strip())\n",
-        "#NUM TOKENS == 49407\n",
-        "for index in range(NUM_TOKENS):\n",
-        "  #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
-        "  d[f'{index}']= vocab[indices[index].item()] #<---- write values to .db file\n",
-        "#----#\n",
-        "d.close() #close the file\n",
-        "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
-      ],
-      "metadata": {
-        "id": "qj888fPEbX8K"
-      },
-      "execution_count": 15,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# @title 🪐🖼️ -> 📝 Token-Sampling Image interrogator\n",
-        "VOCAB_FILENAME = 'tokens_most_similiar_to_girl' #This vocab has been ordered where lowest index has the highest similarity to the reference vector \"girl</w>\". Feel free to create your own .db around a target token in above cells.\n",
         "#-----#\n",
         "import shelve\n",
         "db_vocab = shelve.open(VOCAB_FILENAME)\n",
         "# @markdown # What do you want to to mimic?\n",
-        "use = '📝text_encoding from prompt' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n",
         "# @markdown --------------------------\n",
         "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n",
         "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
@@ -319,7 +317,7 @@
         "  return list(uploaded.keys())\n",
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
-        "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
         "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
         "# @markdown --------------------------\n",
         "from PIL import Image\n",
@@ -368,7 +366,7 @@
         "# @markdown -----\n",
         "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
         "start_search_at_index = 1700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
-        "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the reference token \"girl\\</w>\"\n",
         "start_search_at_ID = start_search_at_index\n",
         "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
@@ -567,7 +565,22 @@
       ],
       "metadata": {
         "collapsed": true,
-        "id": "fi0jRruI0-tu"
       },
       "execution_count": null,
       "outputs": []

         "%cd /content/sd_tokens\n",
         "token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)\n",
         "#-----#\n",
+        "VOCAB_FILENAME = 'tokens_most_similiar_to_girl'\n",
+        "ACTIVE_IMG = ''\n",
+        "#-----#\n",
         "\n",
         "#Import the vocab.json\n",
         "import json\n",
         "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
         "\n",
         "# @markdown Write name of token to match against\n",
+        "token_name = \"banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
+        "\n",
+        "prompt = token_name\n",
         "# @markdown (optional) Mix the token with something else\n",
         "mix_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for random value token\"}\n",
         "mix_method = \"None\" # @param [\"None\" , \"Average\", \"Subtract\"] {allow-input: true}\n",
         "\n",
         "#The prompt will be enclosed with the <|start-of-text|> and <|end-of-text|> tokens, which is why output will be [49406, ... , 49407].\n",
         "\n",
+        "#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID.\n",
+        "\n",
+        "# Save results as .db file\n",
+        "import shelve\n",
+        "VOCAB_FILENAME = 'tokens_most_similiar_to_' + name_A.replace('</w>','').strip()\n",
+        "d = shelve.open(VOCAB_FILENAME)\n",
+        "#NUM TOKENS == 49407\n",
+        "for index in range(NUM_TOKENS):\n",
+        "  #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
+        "  d[f'{index}']= vocab[indices[index].item()] #<---- write values to .db file\n",
+        "#----#\n",
+        "d.close() #close the file\n",
+        "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
       ],
       "metadata": {
+        "id": "iWeFnT1gAx6A",
+        "cellView": "form"
       },
       "execution_count": null,
       "outputs": []
         "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n",
         "\n",
         "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n",
+        "\n",
+        "Check the sd_tokens folder for stored .db files from running the '⚡ Get similiar tokens' cell. These can be used in the ⚡+🖼️ -> 📝 Token-Sampling Image interrogator cell\n"
       ],
       "metadata": {
         "id": "IUCuV9RtQpBn"
     {
       "cell_type": "code",
       "source": [
+        "# @title ⚡+🖼️ -> 📝 Token-Sampling Image interrogator\n",
         "#-----#\n",
         "import shelve\n",
         "db_vocab = shelve.open(VOCAB_FILENAME)\n",
         "# @markdown # What do you want to to mimic?\n",
+        "use = '🖼️image_encoding from image' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n",
         "# @markdown --------------------------\n",
         "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n",
         "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
         "  return list(uploaded.keys())\n",
         "#Get image\n",
         "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
+        "image_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
         "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
         "# @markdown --------------------------\n",
         "from PIL import Image\n",
         "# @markdown -----\n",
         "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
         "start_search_at_index = 1700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
+        "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the target token assigned in the '⚡ Get similiar tokens' cell\"\n",
         "start_search_at_ID = start_search_at_index\n",
         "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
         "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
       ],
       "metadata": {
         "collapsed": true,
+        "id": "fi0jRruI0-tu",
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# @title (Optional) ⚡Actively set which Vocab list to use for the interrogator\n",
+        "token_name = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a token_name used earlier\"}\n",
+        "VOCAB_FILENAME = 'tokens_most_similiar_to_' + token_name.replace('</w>','').strip()\n",
+        "print(f'Using a vocab ordered to most similiar to the token {token_name}')"
+      ],
+      "metadata": {
+        "id": "FYa96UCQuE1U"
       },
       "execution_count": null,
       "outputs": []