File size: 2,647 Bytes
44eca84 f22aece 44eca84 8ad9fee 44eca84 8ad9fee 44eca84 8ad9fee 44eca84 8ad9fee 44eca84 8ad9fee 44eca84 8ad9fee 44eca84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"# Load the tokens into the colab\n",
"!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
"import torch\n",
"from torch import linalg as LA\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"%cd /content/sd_tokens\n",
"token = torch.load('sd15_tensors.pt', map_location=device, weights_only=True)"
],
"metadata": {
"id": "Ch9puvwKH1s3"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(token[100].shape) #dimension of the tokens"
],
"metadata": {
"id": "S_Yh9gH_OUA1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"def absolute_value(x):\n",
" return max(x, -x)\n",
"\n",
"def similarity(id_A , id_B):\n",
" #Tensors\n",
" A = token[id_A]\n",
" B = token[id_B]\n",
"\n",
" #Tensor vector length (2nd order, i.e (a^2 + b^2 + ....)^(1/2)\n",
" _A = LA.vector_norm(A, ord=2)\n",
" _B = LA.vector_norm(B, ord=2)\n",
"\n",
" result = torch.dot(A,B)/(_A*_B)\n",
" similarity_pcnt = absolute_value(result.item()*100)\n",
"\n",
" similarity_pcnt_aprox = round(similarity_pcnt, 3)\n",
"\n",
" result = f'{similarity_pcnt_aprox} %'\n",
"\n",
" return result"
],
"metadata": {
"id": "fxquCxFaUxAZ"
},
"execution_count": 16,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"Valid ID ranges for id_for_token_A / id_for_token_B are between 0 and 49407"
],
"metadata": {
"id": "kX72bAuhOtlT"
}
},
{
"cell_type": "code",
"source": [
"id_for_token_A = 4567 # @param {type:'number'}\n",
"id_for_token_B = 4343 # @param {type:'number'}\n",
"\n",
"similarity_str = 'The similarity between tokens A and B is ' + similarity(id_for_token_A , id_for_token_B)\n",
"\n",
"print(similarity_str)"
],
"metadata": {
"id": "MwmOdC9cNZty"
},
"execution_count": null,
"outputs": []
}
]
} |