Upload sd_token_similarity_calculator.ipynb
Browse files
sd_token_similarity_calculator.ipynb
CHANGED
@@ -78,8 +78,8 @@
|
|
78 |
{
|
79 |
"cell_type": "code",
|
80 |
"source": [
|
81 |
-
"print(vocab[
|
82 |
-
"print(token[
|
83 |
],
|
84 |
"metadata": {
|
85 |
"id": "S_Yh9gH_OUA1"
|
@@ -104,7 +104,7 @@
|
|
104 |
"\n",
|
105 |
"from transformers import AutoTokenizer\n",
|
106 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
107 |
-
"prompt= \"
|
108 |
"tokenizer_output = tokenizer(text = prompt)\n",
|
109 |
"input_ids = tokenizer_output['input_ids']\n",
|
110 |
"print(input_ids)"
|
@@ -135,7 +135,7 @@
|
|
135 |
"\n",
|
136 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
137 |
"#----#\n",
|
138 |
-
"print(f'Calculated all cosine-similarities between ID = {id_A} the rest of the
|
139 |
"print(f'Calculated indices as a 1x{indices.shape[0]} tensor')"
|
140 |
],
|
141 |
"metadata": {
|
@@ -149,8 +149,9 @@
|
|
149 |
"source": [
|
150 |
"list_size = 10 # @param {type:'number'}\n",
|
151 |
"for index in range(list_size):\n",
|
152 |
-
"
|
153 |
-
" print(f'
|
|
|
154 |
" print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n",
|
155 |
" print('--------')\n"
|
156 |
],
|
|
|
78 |
{
|
79 |
"cell_type": "code",
|
80 |
"source": [
|
81 |
+
"print(vocab[8922]) #the vocab item for ID 8922\n",
|
82 |
+
"print(token[8922].shape) #dimension of the token"
|
83 |
],
|
84 |
"metadata": {
|
85 |
"id": "S_Yh9gH_OUA1"
|
|
|
104 |
"\n",
|
105 |
"from transformers import AutoTokenizer\n",
|
106 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
107 |
+
"prompt= \"banana\" # @param {type:'string'}\n",
|
108 |
"tokenizer_output = tokenizer(text = prompt)\n",
|
109 |
"input_ids = tokenizer_output['input_ids']\n",
|
110 |
"print(input_ids)"
|
|
|
135 |
"\n",
|
136 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
137 |
"#----#\n",
|
138 |
+
"print(f'Calculated all cosine-similarities between the token {vocab[id_A]} with ID = {id_A} the rest of the {NUM_TOKENS} tokens as a 1x{sorted.shape[0]} tensor')\n",
|
139 |
"print(f'Calculated indices as a 1x{indices.shape[0]} tensor')"
|
140 |
],
|
141 |
"metadata": {
|
|
|
149 |
"source": [
|
150 |
"list_size = 10 # @param {type:'number'}\n",
|
151 |
"for index in range(list_size):\n",
|
152 |
+
" id = indices[index].item()\n",
|
153 |
+
" print(f'{vocab[id]}') # vocab item\n",
|
154 |
+
" print(f'ID = {id}') # IDs\n",
|
155 |
" print(f'similiarity = {round(sorted[index].item()*100,2)} %') # % value\n",
|
156 |
" print('--------')\n"
|
157 |
],
|