Geohunterr commited on
Commit
56ca19e
·
verified ·
1 Parent(s): a7c9b56

Training in progress, epoch 0

Browse files
.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
Logs/events.out.tfevents.1718300677.78fe09153f4a.177.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39f11fe12b3d9dc1241eccafcfd0b589861d6e0aab1168f81b23776b49db392
3
+ size 6094
Untitled.ipynb ADDED
@@ -0,0 +1,1224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "76787265-e5ef-4dc7-9547-7e215461ba65",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "Collecting transformers==4.41.0\n",
14
+ " Downloading transformers-4.41.0-py3-none-any.whl.metadata (43 kB)\n",
15
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.8/43.8 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
16
+ "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.41.0) (3.9.0)\n",
17
+ "Collecting huggingface-hub<1.0,>=0.23.0 (from transformers==4.41.0)\n",
18
+ " Downloading huggingface_hub-0.23.3-py3-none-any.whl.metadata (12 kB)\n",
19
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.41.0) (1.24.1)\n",
20
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.41.0) (23.2)\n",
21
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.41.0) (6.0.1)\n",
22
+ "Collecting regex!=2019.12.17 (from transformers==4.41.0)\n",
23
+ " Downloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
24
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
25
+ "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.41.0) (2.31.0)\n",
26
+ "Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.0)\n",
27
+ " Downloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
28
+ "Collecting safetensors>=0.4.1 (from transformers==4.41.0)\n",
29
+ " Downloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
30
+ "Collecting tqdm>=4.27 (from transformers==4.41.0)\n",
31
+ " Downloading tqdm-4.66.4-py3-none-any.whl.metadata (57 kB)\n",
32
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
33
+ "\u001b[?25hCollecting fsspec>=2023.5.0 (from huggingface-hub<1.0,>=0.23.0->transformers==4.41.0)\n",
34
+ " Downloading fsspec-2024.6.0-py3-none-any.whl.metadata (11 kB)\n",
35
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers==4.41.0) (4.4.0)\n",
36
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.41.0) (2.1.1)\n",
37
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.41.0) (3.4)\n",
38
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.41.0) (1.26.13)\n",
39
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.41.0) (2022.12.7)\n",
40
+ "Downloading transformers-4.41.0-py3-none-any.whl (9.1 MB)\n",
41
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.1/9.1 MB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
42
+ "\u001b[?25hDownloading huggingface_hub-0.23.3-py3-none-any.whl (401 kB)\n",
43
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m401.7/401.7 kB\u001b[0m \u001b[31m63.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
44
+ "\u001b[?25hDownloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)\n",
45
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m775.1/775.1 kB\u001b[0m \u001b[31m56.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
46
+ "\u001b[?25hDownloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
47
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m60.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
48
+ "\u001b[?25hDownloading tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
49
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
50
+ "\u001b[?25hDownloading tqdm-4.66.4-py3-none-any.whl (78 kB)\n",
51
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
52
+ "\u001b[?25hDownloading fsspec-2024.6.0-py3-none-any.whl (176 kB)\n",
53
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.9/176.9 kB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
54
+ "\u001b[?25hInstalling collected packages: tqdm, safetensors, regex, fsspec, huggingface-hub, tokenizers, transformers\n",
55
+ " Attempting uninstall: fsspec\n",
56
+ " Found existing installation: fsspec 2023.4.0\n",
57
+ " Uninstalling fsspec-2023.4.0:\n",
58
+ " Successfully uninstalled fsspec-2023.4.0\n",
59
+ "Successfully installed fsspec-2024.6.0 huggingface-hub-0.23.3 regex-2024.5.15 safetensors-0.4.3 tokenizers-0.19.1 tqdm-4.66.4 transformers-4.41.0\n",
60
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
61
+ "\u001b[0m\n",
62
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
63
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
64
+ ]
65
+ }
66
+ ],
67
+ "source": [
68
+ "!pip install transformers==4.41.0"
69
+ ]
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "execution_count": 2,
74
+ "id": "daa5e231-e5de-4beb-b303-323d017e58e3",
75
+ "metadata": {},
76
+ "outputs": [
77
+ {
78
+ "name": "stdout",
79
+ "output_type": "stream",
80
+ "text": [
81
+ "Collecting datasets==2.19.1\n",
82
+ " Downloading datasets-2.19.1-py3-none-any.whl.metadata (19 kB)\n",
83
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (3.9.0)\n",
84
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (1.24.1)\n",
85
+ "Collecting pyarrow>=12.0.0 (from datasets==2.19.1)\n",
86
+ " Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.0 kB)\n",
87
+ "Collecting pyarrow-hotfix (from datasets==2.19.1)\n",
88
+ " Downloading pyarrow_hotfix-0.6-py3-none-any.whl.metadata (3.6 kB)\n",
89
+ "Collecting dill<0.3.9,>=0.3.0 (from datasets==2.19.1)\n",
90
+ " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
91
+ "Collecting pandas (from datasets==2.19.1)\n",
92
+ " Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n",
93
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (2.31.0)\n",
94
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (4.66.4)\n",
95
+ "Collecting xxhash (from datasets==2.19.1)\n",
96
+ " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
97
+ "Collecting multiprocess (from datasets==2.19.1)\n",
98
+ " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
99
+ "Collecting fsspec<=2024.3.1,>=2023.1.0 (from fsspec[http]<=2024.3.1,>=2023.1.0->datasets==2.19.1)\n",
100
+ " Downloading fsspec-2024.3.1-py3-none-any.whl.metadata (6.8 kB)\n",
101
+ "Collecting aiohttp (from datasets==2.19.1)\n",
102
+ " Downloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.5 kB)\n",
103
+ "Requirement already satisfied: huggingface-hub>=0.21.2 in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (0.23.3)\n",
104
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (23.2)\n",
105
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets==2.19.1) (6.0.1)\n",
106
+ "Collecting aiosignal>=1.1.2 (from aiohttp->datasets==2.19.1)\n",
107
+ " Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)\n",
108
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==2.19.1) (23.1.0)\n",
109
+ "Collecting frozenlist>=1.1.1 (from aiohttp->datasets==2.19.1)\n",
110
+ " Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
111
+ "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets==2.19.1)\n",
112
+ " Downloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.2 kB)\n",
113
+ "Collecting yarl<2.0,>=1.0 (from aiohttp->datasets==2.19.1)\n",
114
+ " Downloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (31 kB)\n",
115
+ "Collecting async-timeout<5.0,>=4.0 (from aiohttp->datasets==2.19.1)\n",
116
+ " Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\n",
117
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.2->datasets==2.19.1) (4.4.0)\n",
118
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.19.1) (2.1.1)\n",
119
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.19.1) (3.4)\n",
120
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.19.1) (1.26.13)\n",
121
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets==2.19.1) (2022.12.7)\n",
122
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets==2.19.1) (2.8.2)\n",
123
+ "Collecting pytz>=2020.1 (from pandas->datasets==2.19.1)\n",
124
+ " Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)\n",
125
+ "Collecting tzdata>=2022.7 (from pandas->datasets==2.19.1)\n",
126
+ " Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
127
+ "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas->datasets==2.19.1) (1.16.0)\n",
128
+ "Downloading datasets-2.19.1-py3-none-any.whl (542 kB)\n",
129
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m542.0/542.0 kB\u001b[0m \u001b[31m23.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
130
+ "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
131
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m30.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
132
+ "\u001b[?25hDownloading fsspec-2024.3.1-py3-none-any.whl (171 kB)\n",
133
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m172.0/172.0 kB\u001b[0m \u001b[31m39.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
134
+ "\u001b[?25hDownloading aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
135
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m47.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
136
+ "\u001b[?25hDownloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)\n",
137
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.8/40.8 MB\u001b[0m \u001b[31m73.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
138
+ "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
139
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m33.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
140
+ "\u001b[?25hDownloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)\n",
141
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.0/13.0 MB\u001b[0m \u001b[31m90.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
142
+ "\u001b[?25hDownloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)\n",
143
+ "Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
144
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m48.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
145
+ "\u001b[?25hDownloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
146
+ "Downloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\n",
147
+ "Downloading frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (239 kB)\n",
148
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m239.5/239.5 kB\u001b[0m \u001b[31m51.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
149
+ "\u001b[?25hDownloading multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (124 kB)\n",
150
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.3/124.3 kB\u001b[0m \u001b[31m34.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
151
+ "\u001b[?25hDownloading pytz-2024.1-py2.py3-none-any.whl (505 kB)\n",
152
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m505.5/505.5 kB\u001b[0m \u001b[31m69.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
153
+ "\u001b[?25hDownloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n",
154
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m53.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
155
+ "\u001b[?25hDownloading yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (301 kB)\n",
156
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m301.6/301.6 kB\u001b[0m \u001b[31m53.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
157
+ "\u001b[?25hInstalling collected packages: pytz, xxhash, tzdata, pyarrow-hotfix, pyarrow, multidict, fsspec, frozenlist, dill, async-timeout, yarl, pandas, multiprocess, aiosignal, aiohttp, datasets\n",
158
+ " Attempting uninstall: fsspec\n",
159
+ " Found existing installation: fsspec 2024.6.0\n",
160
+ " Uninstalling fsspec-2024.6.0:\n",
161
+ " Successfully uninstalled fsspec-2024.6.0\n",
162
+ "Successfully installed aiohttp-3.9.5 aiosignal-1.3.1 async-timeout-4.0.3 datasets-2.19.1 dill-0.3.8 frozenlist-1.4.1 fsspec-2024.3.1 multidict-6.0.5 multiprocess-0.70.16 pandas-2.2.2 pyarrow-16.1.0 pyarrow-hotfix-0.6 pytz-2024.1 tzdata-2024.1 xxhash-3.4.1 yarl-1.9.4\n",
163
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
164
+ "\u001b[0m\n",
165
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
166
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
167
+ ]
168
+ }
169
+ ],
170
+ "source": [
171
+ "!pip install datasets==2.19.1"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": 3,
177
+ "id": "01121bd9-fb04-4cbe-97e4-cc6d92be17f5",
178
+ "metadata": {},
179
+ "outputs": [
180
+ {
181
+ "name": "stdout",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "Collecting Pillow==10.3.0\n",
185
+ " Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (9.2 kB)\n",
186
+ "Downloading pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (4.5 MB)\n",
187
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m30.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
188
+ "\u001b[?25hInstalling collected packages: Pillow\n",
189
+ " Attempting uninstall: Pillow\n",
190
+ " Found existing installation: Pillow 9.3.0\n",
191
+ " Uninstalling Pillow-9.3.0:\n",
192
+ " Successfully uninstalled Pillow-9.3.0\n",
193
+ "Successfully installed Pillow-10.3.0\n",
194
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
195
+ "\u001b[0m\n",
196
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
197
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
198
+ ]
199
+ }
200
+ ],
201
+ "source": [
202
+ "!pip install Pillow==10.3.0"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "code",
207
+ "execution_count": 4,
208
+ "id": "f33c4142-9b1d-40b3-b26c-557f922285f8",
209
+ "metadata": {},
210
+ "outputs": [
211
+ {
212
+ "name": "stdout",
213
+ "output_type": "stream",
214
+ "text": [
215
+ "Collecting peft==0.11.1\n",
216
+ " Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)\n",
217
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (1.24.1)\n",
218
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (23.2)\n",
219
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (5.9.6)\n",
220
+ "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (6.0.1)\n",
221
+ "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (2.1.0+cu118)\n",
222
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (4.41.0)\n",
223
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (4.66.4)\n",
224
+ "Collecting accelerate>=0.21.0 (from peft==0.11.1)\n",
225
+ " Downloading accelerate-0.31.0-py3-none-any.whl.metadata (19 kB)\n",
226
+ "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (0.4.3)\n",
227
+ "Requirement already satisfied: huggingface-hub>=0.17.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.11.1) (0.23.3)\n",
228
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.11.1) (3.9.0)\n",
229
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.11.1) (2024.3.1)\n",
230
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.11.1) (2.31.0)\n",
231
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.11.1) (4.4.0)\n",
232
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.11.1) (1.12)\n",
233
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.11.1) (3.0)\n",
234
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.11.1) (3.1.2)\n",
235
+ "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.11.1) (2.1.0)\n",
236
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.11.1) (2024.5.15)\n",
237
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.11.1) (0.19.1)\n",
238
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft==0.11.1) (2.1.2)\n",
239
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.11.1) (2.1.1)\n",
240
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.11.1) (3.4)\n",
241
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.11.1) (1.26.13)\n",
242
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.11.1) (2022.12.7)\n",
243
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft==0.11.1) (1.3.0)\n",
244
+ "Downloading peft-0.11.1-py3-none-any.whl (251 kB)\n",
245
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.6/251.6 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
246
+ "\u001b[?25hDownloading accelerate-0.31.0-py3-none-any.whl (309 kB)\n",
247
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m309.4/309.4 kB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
248
+ "\u001b[?25hInstalling collected packages: accelerate, peft\n",
249
+ "Successfully installed accelerate-0.31.0 peft-0.11.1\n",
250
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
251
+ "\u001b[0m\n",
252
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
253
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
254
+ ]
255
+ }
256
+ ],
257
+ "source": [
258
+ "!pip install peft==0.11.1"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 5,
264
+ "id": "39fae8b0-ab30-423f-b0cb-27733459d7c5",
265
+ "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "name": "stdout",
269
+ "output_type": "stream",
270
+ "text": [
271
+ "Collecting bitsandbytes==0.43.1\n",
272
+ " Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)\n",
273
+ "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from bitsandbytes==0.43.1) (2.1.0+cu118)\n",
274
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from bitsandbytes==0.43.1) (1.24.1)\n",
275
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (3.9.0)\n",
276
+ "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (4.4.0)\n",
277
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (1.12)\n",
278
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (3.0)\n",
279
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (3.1.2)\n",
280
+ "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (2024.3.1)\n",
281
+ "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch->bitsandbytes==0.43.1) (2.1.0)\n",
282
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->bitsandbytes==0.43.1) (2.1.2)\n",
283
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->bitsandbytes==0.43.1) (1.3.0)\n",
284
+ "Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)\n",
285
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.8/119.8 MB\u001b[0m \u001b[31m48.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
286
+ "\u001b[?25hInstalling collected packages: bitsandbytes\n",
287
+ "Successfully installed bitsandbytes-0.43.1\n",
288
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
289
+ "\u001b[0m\n",
290
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
291
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
292
+ ]
293
+ }
294
+ ],
295
+ "source": [
296
+ "!pip install bitsandbytes==0.43.1"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 6,
302
+ "id": "911781be-5a84-4d24-999d-10a420a90cbb",
303
+ "metadata": {},
304
+ "outputs": [
305
+ {
306
+ "name": "stdout",
307
+ "output_type": "stream",
308
+ "text": [
309
+ "Collecting kagglehub==0.2.5\n",
310
+ " Downloading kagglehub-0.2.5-py3-none-any.whl.metadata (18 kB)\n",
311
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from kagglehub==0.2.5) (23.2)\n",
312
+ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from kagglehub==0.2.5) (2.31.0)\n",
313
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from kagglehub==0.2.5) (4.66.4)\n",
314
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub==0.2.5) (2.1.1)\n",
315
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub==0.2.5) (3.4)\n",
316
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub==0.2.5) (1.26.13)\n",
317
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->kagglehub==0.2.5) (2022.12.7)\n",
318
+ "Downloading kagglehub-0.2.5-py3-none-any.whl (34 kB)\n",
319
+ "Installing collected packages: kagglehub\n",
320
+ "Successfully installed kagglehub-0.2.5\n",
321
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
322
+ "\u001b[0m\n",
323
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
324
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
325
+ ]
326
+ }
327
+ ],
328
+ "source": [
329
+ "!pip install kagglehub==0.2.5"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": 7,
335
+ "id": "4c4384b6-90ed-4746-a658-aa3e0e525c84",
336
+ "metadata": {},
337
+ "outputs": [
338
+ {
339
+ "name": "stdout",
340
+ "output_type": "stream",
341
+ "text": [
342
+ "Collecting tensorboard\n",
343
+ " Downloading tensorboard-2.17.0-py3-none-any.whl.metadata (1.6 kB)\n",
344
+ "Collecting absl-py>=0.4 (from tensorboard)\n",
345
+ " Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)\n",
346
+ "Collecting grpcio>=1.48.2 (from tensorboard)\n",
347
+ " Downloading grpcio-1.64.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.3 kB)\n",
348
+ "Collecting markdown>=2.6.8 (from tensorboard)\n",
349
+ " Downloading Markdown-3.6-py3-none-any.whl.metadata (7.0 kB)\n",
350
+ "Requirement already satisfied: numpy>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (1.24.1)\n",
351
+ "Collecting protobuf!=4.24.0,<5.0.0,>=3.19.6 (from tensorboard)\n",
352
+ " Downloading protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n",
353
+ "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard) (68.2.2)\n",
354
+ "Requirement already satisfied: six>1.9 in /usr/lib/python3/dist-packages (from tensorboard) (1.16.0)\n",
355
+ "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)\n",
356
+ " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n",
357
+ "Collecting werkzeug>=1.0.1 (from tensorboard)\n",
358
+ " Downloading werkzeug-3.0.3-py3-none-any.whl.metadata (3.7 kB)\n",
359
+ "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard) (2.1.2)\n",
360
+ "Downloading tensorboard-2.17.0-py3-none-any.whl (5.5 MB)\n",
361
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m53.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
362
+ "\u001b[?25hDownloading absl_py-2.1.0-py3-none-any.whl (133 kB)\n",
363
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m133.7/133.7 kB\u001b[0m \u001b[31m35.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
364
+ "\u001b[?25hDownloading grpcio-1.64.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.6 MB)\n",
365
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m84.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
366
+ "\u001b[?25hDownloading Markdown-3.6-py3-none-any.whl (105 kB)\n",
367
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.4/105.4 kB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
368
+ "\u001b[?25hDownloading protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n",
369
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m54.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
370
+ "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n",
371
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m83.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n",
372
+ "\u001b[?25hDownloading werkzeug-3.0.3-py3-none-any.whl (227 kB)\n",
373
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━���━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.3/227.3 kB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
374
+ "\u001b[?25hInstalling collected packages: werkzeug, tensorboard-data-server, protobuf, markdown, grpcio, absl-py, tensorboard\n",
375
+ "Successfully installed absl-py-2.1.0 grpcio-1.64.1 markdown-3.6 protobuf-4.25.3 tensorboard-2.17.0 tensorboard-data-server-0.7.2 werkzeug-3.0.3\n",
376
+ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
377
+ "\u001b[0m\n",
378
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
379
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n"
380
+ ]
381
+ }
382
+ ],
383
+ "source": [
384
+ "!pip install tensorboard"
385
+ ]
386
+ },
387
+ {
388
+ "cell_type": "code",
389
+ "execution_count": 8,
390
+ "id": "d66b20ef-657a-4020-a78c-928a5f11ec49",
391
+ "metadata": {},
392
+ "outputs": [
393
+ {
394
+ "data": {
395
+ "application/vnd.jupyter.widget-view+json": {
396
+ "model_id": "d0eac475d4d34dbc85acf125cfce2fdb",
397
+ "version_major": 2,
398
+ "version_minor": 0
399
+ },
400
+ "text/plain": [
401
+ "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
402
+ ]
403
+ },
404
+ "metadata": {},
405
+ "output_type": "display_data"
406
+ }
407
+ ],
408
+ "source": [
409
+ "from huggingface_hub import notebook_login\n",
410
+ "notebook_login()"
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "execution_count": null,
416
+ "id": "4abc6f1b-fd34-4ff8-a3d8-4b4aa8d93215",
417
+ "metadata": {},
418
+ "outputs": [
419
+ {
420
+ "name": "stdout",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "Current Directory: /workspace\n"
424
+ ]
425
+ },
426
+ {
427
+ "data": {
428
+ "application/vnd.jupyter.widget-view+json": {
429
+ "model_id": "a8e52aeb8c194ca185a59f15c1b26533",
430
+ "version_major": 2,
431
+ "version_minor": 0
432
+ },
433
+ "text/plain": [
434
+ "Downloading builder script: 0%| | 0.00/5.50k [00:00<?, ?B/s]"
435
+ ]
436
+ },
437
+ "metadata": {},
438
+ "output_type": "display_data"
439
+ },
440
+ {
441
+ "data": {
442
+ "application/vnd.jupyter.widget-view+json": {
443
+ "model_id": "cbdcc8000a3f4d72a88dcd8a7d317714",
444
+ "version_major": 2,
445
+ "version_minor": 0
446
+ },
447
+ "text/plain": [
448
+ "Downloading readme: 0%| | 0.00/2.86k [00:00<?, ?B/s]"
449
+ ]
450
+ },
451
+ "metadata": {},
452
+ "output_type": "display_data"
453
+ },
454
+ {
455
+ "data": {
456
+ "application/vnd.jupyter.widget-view+json": {
457
+ "model_id": "45b9006f0aac4f859bce3323b2369649",
458
+ "version_major": 2,
459
+ "version_minor": 0
460
+ },
461
+ "text/plain": [
462
+ "Downloading data: 0%| | 0.00/142k [00:00<?, ?B/s]"
463
+ ]
464
+ },
465
+ "metadata": {},
466
+ "output_type": "display_data"
467
+ },
468
+ {
469
+ "data": {
470
+ "application/vnd.jupyter.widget-view+json": {
471
+ "model_id": "2d728d1733054affb2b4425c1c8a5d02",
472
+ "version_major": 2,
473
+ "version_minor": 0
474
+ },
475
+ "text/plain": [
476
+ "Downloading data: 0%| | 0.00/18.3k [00:00<?, ?B/s]"
477
+ ]
478
+ },
479
+ "metadata": {},
480
+ "output_type": "display_data"
481
+ },
482
+ {
483
+ "data": {
484
+ "application/vnd.jupyter.widget-view+json": {
485
+ "model_id": "f9efe2f727a3455f9a1c334ec6f9ff1e",
486
+ "version_major": 2,
487
+ "version_minor": 0
488
+ },
489
+ "text/plain": [
490
+ "Downloading data: 0%| | 0.00/18.3k [00:00<?, ?B/s]"
491
+ ]
492
+ },
493
+ "metadata": {},
494
+ "output_type": "display_data"
495
+ },
496
+ {
497
+ "data": {
498
+ "application/vnd.jupyter.widget-view+json": {
499
+ "model_id": "4870f8ce6666483887433632d4a66dc7",
500
+ "version_major": 2,
501
+ "version_minor": 0
502
+ },
503
+ "text/plain": [
504
+ "Downloading data: 0%| | 0.00/603k [00:00<?, ?B/s]"
505
+ ]
506
+ },
507
+ "metadata": {},
508
+ "output_type": "display_data"
509
+ },
510
+ {
511
+ "data": {
512
+ "application/vnd.jupyter.widget-view+json": {
513
+ "model_id": "c5d6b3731cee4b32a03ab3684a35eda5",
514
+ "version_major": 2,
515
+ "version_minor": 0
516
+ },
517
+ "text/plain": [
518
+ "Downloading data: 0%| | 0.00/69.7k [00:00<?, ?B/s]"
519
+ ]
520
+ },
521
+ "metadata": {},
522
+ "output_type": "display_data"
523
+ },
524
+ {
525
+ "data": {
526
+ "application/vnd.jupyter.widget-view+json": {
527
+ "model_id": "3bdaaf5cbe2e42749167b87bee019fad",
528
+ "version_major": 2,
529
+ "version_minor": 0
530
+ },
531
+ "text/plain": [
532
+ "Downloading data: 0%| | 0.00/77.8k [00:00<?, ?B/s]"
533
+ ]
534
+ },
535
+ "metadata": {},
536
+ "output_type": "display_data"
537
+ },
538
+ {
539
+ "data": {
540
+ "application/vnd.jupyter.widget-view+json": {
541
+ "model_id": "c21354d509d54d8188bef922b8ef17a5",
542
+ "version_major": 2,
543
+ "version_minor": 0
544
+ },
545
+ "text/plain": [
546
+ "Downloading data: 0%| | 0.00/3.77G [00:00<?, ?B/s]"
547
+ ]
548
+ },
549
+ "metadata": {},
550
+ "output_type": "display_data"
551
+ },
552
+ {
553
+ "data": {
554
+ "application/vnd.jupyter.widget-view+json": {
555
+ "model_id": "aa7f2bcdb6564170abf1cb6134b58045",
556
+ "version_major": 2,
557
+ "version_minor": 0
558
+ },
559
+ "text/plain": [
560
+ "Downloading data: 0%| | 0.00/439M [00:00<?, ?B/s]"
561
+ ]
562
+ },
563
+ "metadata": {},
564
+ "output_type": "display_data"
565
+ },
566
+ {
567
+ "data": {
568
+ "application/vnd.jupyter.widget-view+json": {
569
+ "model_id": "f9e93d09af034b3eac24b9d5d01b5841",
570
+ "version_major": 2,
571
+ "version_minor": 0
572
+ },
573
+ "text/plain": [
574
+ "Downloading data: 0%| | 0.00/463M [00:00<?, ?B/s]"
575
+ ]
576
+ },
577
+ "metadata": {},
578
+ "output_type": "display_data"
579
+ },
580
+ {
581
+ "data": {
582
+ "application/vnd.jupyter.widget-view+json": {
583
+ "model_id": "a478ae928cee4fb7b838344ff1088985",
584
+ "version_major": 2,
585
+ "version_minor": 0
586
+ },
587
+ "text/plain": [
588
+ "Generating train split: 0 examples [00:00, ? examples/s]"
589
+ ]
590
+ },
591
+ "metadata": {},
592
+ "output_type": "display_data"
593
+ },
594
+ {
595
+ "data": {
596
+ "application/vnd.jupyter.widget-view+json": {
597
+ "model_id": "a6228e491587454d87765b8c295d589a",
598
+ "version_major": 2,
599
+ "version_minor": 0
600
+ },
601
+ "text/plain": [
602
+ "Generating validation split: 0 examples [00:00, ? examples/s]"
603
+ ]
604
+ },
605
+ "metadata": {},
606
+ "output_type": "display_data"
607
+ },
608
+ {
609
+ "data": {
610
+ "application/vnd.jupyter.widget-view+json": {
611
+ "model_id": "1a61981eca1742d6a1818cf58b972670",
612
+ "version_major": 2,
613
+ "version_minor": 0
614
+ },
615
+ "text/plain": [
616
+ "Generating test split: 0 examples [00:00, ? examples/s]"
617
+ ]
618
+ },
619
+ "metadata": {},
620
+ "output_type": "display_data"
621
+ },
622
+ {
623
+ "data": {
624
+ "application/vnd.jupyter.widget-view+json": {
625
+ "model_id": "e72d3081316849858b54223c7f6ff146",
626
+ "version_major": 2,
627
+ "version_minor": 0
628
+ },
629
+ "text/plain": [
630
+ "preprocessor_config.json: 0%| | 0.00/699 [00:00<?, ?B/s]"
631
+ ]
632
+ },
633
+ "metadata": {},
634
+ "output_type": "display_data"
635
+ },
636
+ {
637
+ "data": {
638
+ "application/vnd.jupyter.widget-view+json": {
639
+ "model_id": "83e75fd2f7184ec682d2d836f903ac46",
640
+ "version_major": 2,
641
+ "version_minor": 0
642
+ },
643
+ "text/plain": [
644
+ "tokenizer_config.json: 0%| | 0.00/40.0k [00:00<?, ?B/s]"
645
+ ]
646
+ },
647
+ "metadata": {},
648
+ "output_type": "display_data"
649
+ },
650
+ {
651
+ "data": {
652
+ "application/vnd.jupyter.widget-view+json": {
653
+ "model_id": "6acf7068f1274c5588e273aa48569bd0",
654
+ "version_major": 2,
655
+ "version_minor": 0
656
+ },
657
+ "text/plain": [
658
+ "tokenizer.model: 0%| | 0.00/4.26M [00:00<?, ?B/s]"
659
+ ]
660
+ },
661
+ "metadata": {},
662
+ "output_type": "display_data"
663
+ },
664
+ {
665
+ "data": {
666
+ "application/vnd.jupyter.widget-view+json": {
667
+ "model_id": "9888d640db1d460e9d61a820a3a8adcc",
668
+ "version_major": 2,
669
+ "version_minor": 0
670
+ },
671
+ "text/plain": [
672
+ "tokenizer.json: 0%| | 0.00/17.5M [00:00<?, ?B/s]"
673
+ ]
674
+ },
675
+ "metadata": {},
676
+ "output_type": "display_data"
677
+ },
678
+ {
679
+ "data": {
680
+ "application/vnd.jupyter.widget-view+json": {
681
+ "model_id": "21d53ace6e25495890222c17e261b35e",
682
+ "version_major": 2,
683
+ "version_minor": 0
684
+ },
685
+ "text/plain": [
686
+ "added_tokens.json: 0%| | 0.00/24.0 [00:00<?, ?B/s]"
687
+ ]
688
+ },
689
+ "metadata": {},
690
+ "output_type": "display_data"
691
+ },
692
+ {
693
+ "data": {
694
+ "application/vnd.jupyter.widget-view+json": {
695
+ "model_id": "5602f4e1aeeb4fe188959d11ba20f264",
696
+ "version_major": 2,
697
+ "version_minor": 0
698
+ },
699
+ "text/plain": [
700
+ "special_tokens_map.json: 0%| | 0.00/607 [00:00<?, ?B/s]"
701
+ ]
702
+ },
703
+ "metadata": {},
704
+ "output_type": "display_data"
705
+ },
706
+ {
707
+ "data": {
708
+ "application/vnd.jupyter.widget-view+json": {
709
+ "model_id": "4b838f37f02942489cf46c4dc88996bc",
710
+ "version_major": 2,
711
+ "version_minor": 0
712
+ },
713
+ "text/plain": [
714
+ "config.json: 0%| | 0.00/1.03k [00:00<?, ?B/s]"
715
+ ]
716
+ },
717
+ "metadata": {},
718
+ "output_type": "display_data"
719
+ },
720
+ {
721
+ "data": {
722
+ "application/vnd.jupyter.widget-view+json": {
723
+ "model_id": "411374384e794858bfc38196d79519a3",
724
+ "version_major": 2,
725
+ "version_minor": 0
726
+ },
727
+ "text/plain": [
728
+ "model.safetensors.index.json: 0%| | 0.00/62.6k [00:00<?, ?B/s]"
729
+ ]
730
+ },
731
+ "metadata": {},
732
+ "output_type": "display_data"
733
+ },
734
+ {
735
+ "data": {
736
+ "application/vnd.jupyter.widget-view+json": {
737
+ "model_id": "ba2f2463c6e04dc18456af484f3a8d08",
738
+ "version_major": 2,
739
+ "version_minor": 0
740
+ },
741
+ "text/plain": [
742
+ "Downloading shards: 0%| | 0/3 [00:00<?, ?it/s]"
743
+ ]
744
+ },
745
+ "metadata": {},
746
+ "output_type": "display_data"
747
+ },
748
+ {
749
+ "data": {
750
+ "application/vnd.jupyter.widget-view+json": {
751
+ "model_id": "dd91ff2bf11c4eeaba6f1621c052e7c5",
752
+ "version_major": 2,
753
+ "version_minor": 0
754
+ },
755
+ "text/plain": [
756
+ "model-00001-of-00003.safetensors: 0%| | 0.00/4.95G [00:00<?, ?B/s]"
757
+ ]
758
+ },
759
+ "metadata": {},
760
+ "output_type": "display_data"
761
+ },
762
+ {
763
+ "data": {
764
+ "application/vnd.jupyter.widget-view+json": {
765
+ "model_id": "51e62a7d32204ef7aa42070d3e071a31",
766
+ "version_major": 2,
767
+ "version_minor": 0
768
+ },
769
+ "text/plain": [
770
+ "model-00002-of-00003.safetensors: 0%| | 0.00/5.00G [00:00<?, ?B/s]"
771
+ ]
772
+ },
773
+ "metadata": {},
774
+ "output_type": "display_data"
775
+ },
776
+ {
777
+ "data": {
778
+ "application/vnd.jupyter.widget-view+json": {
779
+ "model_id": "2fe83bfe2c6b4f139b901499844219c3",
780
+ "version_major": 2,
781
+ "version_minor": 0
782
+ },
783
+ "text/plain": [
784
+ "model-00003-of-00003.safetensors: 0%| | 0.00/1.74G [00:00<?, ?B/s]"
785
+ ]
786
+ },
787
+ "metadata": {},
788
+ "output_type": "display_data"
789
+ },
790
+ {
791
+ "name": "stderr",
792
+ "output_type": "stream",
793
+ "text": [
794
+ "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
795
+ "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
796
+ "`config.hidden_activation` if you want to override this behaviour.\n",
797
+ "See https://github.com/huggingface/transformers/pull/29402 for more details.\n"
798
+ ]
799
+ },
800
+ {
801
+ "data": {
802
+ "application/vnd.jupyter.widget-view+json": {
803
+ "model_id": "c3635aa05f51448e8527bd174a7afce0",
804
+ "version_major": 2,
805
+ "version_minor": 0
806
+ },
807
+ "text/plain": [
808
+ "Loading checkpoint shards: 0%| | 0/3 [00:00<?, ?it/s]"
809
+ ]
810
+ },
811
+ "metadata": {},
812
+ "output_type": "display_data"
813
+ },
814
+ {
815
+ "data": {
816
+ "application/vnd.jupyter.widget-view+json": {
817
+ "model_id": "b10a25ba4b05417b8faeee904aa911af",
818
+ "version_major": 2,
819
+ "version_minor": 0
820
+ },
821
+ "text/plain": [
822
+ "generation_config.json: 0%| | 0.00/137 [00:00<?, ?B/s]"
823
+ ]
824
+ },
825
+ "metadata": {},
826
+ "output_type": "display_data"
827
+ },
828
+ {
829
+ "name": "stderr",
830
+ "output_type": "stream",
831
+ "text": [
832
+ "/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1474: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
833
+ " warnings.warn(\n",
834
+ "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
835
+ ]
836
+ },
837
+ {
838
+ "name": "stdout",
839
+ "output_type": "stream",
840
+ "text": [
841
+ "trainable params: 11,298,816 || all params: 2,934,765,296 || trainable%: 0.3850\n"
842
+ ]
843
+ },
844
+ {
845
+ "name": "stderr",
846
+ "output_type": "stream",
847
+ "text": [
848
+ "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:588: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
849
+ " warnings.warn(\n",
850
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n",
851
+ " warnings.warn(\n",
852
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:61: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n",
853
+ " warnings.warn(\n",
854
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.\n"
855
+ ]
856
+ },
857
+ {
858
+ "data": {
859
+ "text/html": [
860
+ "\n",
861
+ " <div>\n",
862
+ " \n",
863
+ " <progress value='130' max='258' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
864
+ " [130/258 28:50 < 28:50, 0.07 it/s, Epoch 1.00/2]\n",
865
+ " </div>\n",
866
+ " <table border=\"1\" class=\"dataframe\">\n",
867
+ " <thead>\n",
868
+ " <tr style=\"text-align: left;\">\n",
869
+ " <th>Epoch</th>\n",
870
+ " <th>Training Loss</th>\n",
871
+ " <th>Validation Loss</th>\n",
872
+ " </tr>\n",
873
+ " </thead>\n",
874
+ " <tbody>\n",
875
+ " </tbody>\n",
876
+ "</table><p>\n",
877
+ " <div>\n",
878
+ " \n",
879
+ " <progress value='117' max='130' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
880
+ " [117/130 01:30 < 00:10, 1.27 it/s]\n",
881
+ " </div>\n",
882
+ " "
883
+ ],
884
+ "text/plain": [
885
+ "<IPython.core.display.HTML object>"
886
+ ]
887
+ },
888
+ "metadata": {},
889
+ "output_type": "display_data"
890
+ }
891
+ ],
892
+ "source": [
893
+ "# import torch\n",
894
+ "\n",
895
+ "# from PIL import Image\n",
896
+ "# import requests\n",
897
+ "# from io import BytesIO\n",
898
+ "# import base64\n",
899
+ "\n",
900
+ "# from AllImagesb64 import TestBase64ECG\n",
901
+ "\n",
902
+ "# #HuggingFace Imports\n",
903
+ "# from transformers import AutoTokenizer, PaliGemmaForConditionalGeneration, PaliGemmaProcessor,AutoProcessor,BitsAndBytesConfig,TrainingArguments,Trainer\n",
904
+ "# from datasets import load_dataset\n",
905
+ "# from peft import get_peft_model,LoraConfig #Parameter Efficient FIne Tuning Library\n",
906
+ "\n",
907
+ "\n",
908
+ "# import subprocess\n",
909
+ "# import kagglehub\n",
910
+ "# import os\n",
911
+ "\n",
912
+ "\n",
913
+ "\n",
914
+ "# InputTxt = \"Is there any abnormality with this ecg ?\"\n",
915
+ "# InputImg = TestBase64ECG.replace(\"data:image/jpeg;base64,\",\"\")\n",
916
+ "#\n",
917
+ "#\n",
918
+ "# InputImgTensor = Image.open(BytesIO(base64.b64decode(InputImg)))\n",
919
+ "#\n",
920
+ "# print(\"Img to Process\",InputImgTensor)\n",
921
+ "\n",
922
+ "\n",
923
+ "#====================================Hugging Face Transformers Vanilla PaliGemma Model====================================\n",
924
+ "\n",
925
+ "# device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
926
+ "#\n",
927
+ "# ModelID = \"google/paligemma-3b-mix-224\"\n",
928
+ "#\n",
929
+ "# Model = PaliGemmaForConditionalGeneration.from_pretrained(ModelID,torch_dtype=torch.bfloat16)\n",
930
+ "# Processor = PaliGemmaProcessor.from_pretrained(ModelID)\n",
931
+ "#\n",
932
+ "# InputTokens = Processor(text=InputTxt,images=InputImgTensor,padding=\"longest\",do_convert_rgb=True,return_tensors=\"pt\").to(\"cuda\")\n",
933
+ "# Model.to(device)\n",
934
+ "#\n",
935
+ "# InputTokens = InputTokens.to(dtype=Model.dtype)\n",
936
+ "#\n",
937
+ "# with torch.no_grad():\n",
938
+ "# output = Model.generate(**InputTokens,max_length=496)\n",
939
+ "#\n",
940
+ "# #All 257152 is padding\n",
941
+ "# print(output)\n",
942
+ "# #Decode takes the Vector and maps Its Components back From Token ID to Token Word\n",
943
+ "# print(Processor.decode(output[0],skip_special_tokens=True))\n",
944
+ "\n",
945
+ "\n",
946
+ "\n",
947
+ "#======================Load the Model Using 4 Bit Quantization If Limited RAM============================\n",
948
+ "#Weights Turned from Float32 to Normal Float4\n",
949
+ "\n",
950
+ "# bnbConfig = BitsAndBytesConfig(\n",
951
+ "# load_in_4bit=True,\n",
952
+ "# bnb_4bit_quant_type=\"nf4\", #normal float4\n",
953
+ "# bnb_4bit_compute_dtype=torch.bfloat16 #Original Model Float\n",
954
+ "#\n",
955
+ "# )\n",
956
+ "#\n",
957
+ "#\n",
958
+ "#\n",
959
+ "# Model = PaliGemmaForConditionalGeneration.from_pretrained(\n",
960
+ "# ModelID,\n",
961
+ "# quantization_config = bnbConfig,\n",
962
+ "# device_map={\"\":0}\n",
963
+ "# )\n",
964
+ "#\n",
965
+ "# Processor = PaliGemmaProcessor.from_pretrained(ModelID)\n",
966
+ "#\n",
967
+ "# InputTokens = Processor(text=InputTxt,images=InputImgTensor,padding=\"longest\",do_convert_rgb=True,return_tensors=\"pt\").to(\"cuda\")\n",
968
+ "# Model.to(device)\n",
969
+ "#\n",
970
+ "# InputTokens = InputTokens.to(dtype=Model.dtype)\n",
971
+ "#\n",
972
+ "# with torch.no_grad():\n",
973
+ "# output = Model.generate(**Inputs,max_length=496)\n",
974
+ "#\n",
975
+ "# #All 257152 is padding\n",
976
+ "# print(output)\n",
977
+ "# #Decode takes the Vector and maps Its Components back From Token ID to Token Word\n",
978
+ "# print(Processor.decode(output[0],skip_special_tokens=True))\n",
979
+ "\n",
980
+ "\n",
981
+ "import torch\n",
982
+ "import numpy as np\n",
983
+ "from PIL import Image\n",
984
+ "import requests\n",
985
+ "from io import BytesIO\n",
986
+ "import base64\n",
987
+ "\n",
988
+ "#HuggingFace Imports\n",
989
+ "import transformers\n",
990
+ "from transformers import AutoTokenizer, PaliGemmaForConditionalGeneration, PaliGemmaProcessor,AutoProcessor,BitsAndBytesConfig,TrainingArguments,Trainer\n",
991
+ "from datasets import load_dataset\n",
992
+ "from peft import get_peft_model,LoraConfig,prepare_model_for_kbit_training #Parameter Efficient FIne Tuning Library\n",
993
+ "\n",
994
+ "\n",
995
+ "import subprocess\n",
996
+ "import kagglehub\n",
997
+ "import os\n",
998
+ "from torchvision.transforms import ToTensor\n",
999
+ "from torchvision.transforms.functional import to_pil_image\n",
1000
+ "\n",
1001
+ "#================================Fine Tuning With LoRA/QLoRA using Hugging Face Dataset ====================================\n",
1002
+ "\n",
1003
+ "print(\"Current Directory:\", os.getcwd())\n",
1004
+ "#=================1) Dataset Processing==============================================\n",
1005
+ "FullDataset = load_dataset(\"Geohunterr/ECGTVision\",trust_remote_code=True)\n",
1006
+ "\n",
1007
+ "#Remove Some Columns from the Dataset That we won't need\n",
1008
+ "ColsToRemove = [\"question_type\",\"answers\",\"answer_type\",\"question_id\"]\n",
1009
+ "\n",
1010
+ "FullDataset = FullDataset.remove_columns(ColsToRemove)\n",
1011
+ "\n",
1012
+ "#Split Dataset into Training and Testing Segments\n",
1013
+ "TraningDataset= FullDataset[\"train\"]\n",
1014
+ "TestingDataset = FullDataset[\"test\"]\n",
1015
+ "# TestImg = TraningDataset[0][\"image\"]\n",
1016
+ "# TensorImg =ToTensor()(np.array(TestImg)) #() for Class Call then another () for Using it a function bec has attribute __call__\n",
1017
+ "# print(TensorImg)\n",
1018
+ "\n",
1019
+ "\n",
1020
+ "#==================2) Declare the Model ==========================\n",
1021
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
1022
+ "ModelID = \"google/paligemma-3b-pt-224\" #Use pt= Pre-trained model Instead of mix ---> Pretrained + FineTuned\n",
1023
+ "ModelProcessor = PaliGemmaProcessor.from_pretrained(ModelID)\n",
1024
+ "\n",
1025
+ "#=================3) Declare Tokenizer for Tokenizing Dataset=================================\n",
1026
+ "\n",
1027
+ "TokenToIDFn = ModelProcessor.tokenizer.convert_tokens_to_ids(\"<image>\")\n",
1028
+ "\n",
1029
+ "def TokenGeneratorFn(DatasetEntries):\n",
1030
+ " try:\n",
1031
+ " TextArr = [\"answer \"+i[\"question\"] + \"\\n\" + i[\"multiple_choice_answer\"] for i in DatasetEntries]\n",
1032
+ " ImgsArr = [i[\"image\"].convert(\"RGB\") for i in DatasetEntries]\n",
1033
+ " InputTokens = ModelProcessor(text=TextArr,images=ImgsArr,return_tensors=\"pt\",padding=\"longest\",tokenize_newline_separately=False)\n",
1034
+ " Labels = InputTokens[\"input_ids\"].clone()\n",
1035
+ "\n",
1036
+ " Labels[Labels == ModelProcessor.tokenizer.pad_token_id] = -100\n",
1037
+ " Labels[Labels == TokenToIDFn] = -100\n",
1038
+ "\n",
1039
+ " #These above two lines are Equivalent to The commented portion but faster because the work with optimized numpy algorithms\n",
1040
+ " # for i in range(len(Labels)):\n",
1041
+ " # if(Labels[i] == ModelProcessor.tokenizer.pad_token_id):\n",
1042
+ " # Labels[i] = -100\n",
1043
+ " # elif(Labels[i] == TokenToIDFn):\n",
1044
+ " # Labels[i] == -100\n",
1045
+ "\n",
1046
+ " InputTokens[\"labels\"] = Labels # This is V.Imp you have to use labels with a small \"l\" because the model expects labels to be written this way and not as Labels\n",
1047
+ " InputTokens = InputTokens.to(torch.bfloat16).to(device)\n",
1048
+ " return InputTokens\n",
1049
+ "\n",
1050
+ " except Exception as err:\n",
1051
+ " print(\"Error:\",err)\n",
1052
+ "\n",
1053
+ "\n",
1054
+ "#=============================4) Initialize The Fine Tuning --> LoRA Config + Model=============================\n",
1055
+ "FineTuningLoraConfig = LoraConfig(\n",
1056
+ " r=8,\n",
1057
+ " lora_alpha=32,\n",
1058
+ " lora_dropout=0.05,\n",
1059
+ " bias=\"none\",\n",
1060
+ " task_type=\"CAUSAL_LM\",\n",
1061
+ " target_modules=[\"q_proj\",\"v_proj\",\"k_proj\",\"o_proj\",\"gate_proj\",\"up_proj\",\"down_proj\"]\n",
1062
+ "\n",
1063
+ ")\n",
1064
+ "\n",
1065
+ "\n",
1066
+ "\n",
1067
+ "bnbConfig = BitsAndBytesConfig(\n",
1068
+ " load_in_4bit=True,\n",
1069
+ " bnb_4bit_quant_type=\"nf4\", #normal float4\n",
1070
+ " bnb_4bit_compute_dtype=torch.bfloat16 #Original Model Float\n",
1071
+ "\n",
1072
+ ")\n",
1073
+ "\n",
1074
+ "ModelToFineTune = PaliGemmaForConditionalGeneration.from_pretrained(\n",
1075
+ " ModelID,\n",
1076
+ " quantization_config = bnbConfig,\n",
1077
+ " device_map={\"\":0}\n",
1078
+ ")\n",
1079
+ "\n",
1080
+ "\n",
1081
+ "ModelToFineTune = prepare_model_for_kbit_training(ModelToFineTune) #Line is very important to Apply the Mask to the data Tensors for training\n",
1082
+ "ModelLoraFineTune = get_peft_model(ModelToFineTune,FineTuningLoraConfig)\n",
1083
+ "ModelLoraFineTune.print_trainable_parameters()\n",
1084
+ "\n",
1085
+ "#=====================5) Compelete The LoraConfig by adding Training Arguments========================\n",
1086
+ "\n",
1087
+ "TrainingArgs = TrainingArguments(\n",
1088
+ " output_dir=\"/workspace\",\n",
1089
+ " overwrite_output_dir=False,\n",
1090
+ " save_strategy=\"epoch\",\n",
1091
+ " evaluation_strategy=\"epoch\",\n",
1092
+ " run_name=\"ECGFineTunedPali\",\n",
1093
+ " do_train=True,\n",
1094
+ " # do_eval=True,\n",
1095
+ "\n",
1096
+ " logging_dir=\"/workspace/Logs\",\n",
1097
+ " logging_steps=100,\n",
1098
+ " num_train_epochs=2,\n",
1099
+ " per_device_train_batch_size=16,\n",
1100
+ " # per_device_eval_batch_size=16,\n",
1101
+ " gradient_accumulation_steps=4,\n",
1102
+ " warmup_steps=2,\n",
1103
+ " learning_rate=2e-5,\n",
1104
+ " weight_decay=1e-6,\n",
1105
+ " adam_beta2=0.999,\n",
1106
+ " optim=\"adamw_hf\",\n",
1107
+ "\n",
1108
+ " # save_strategy=\"steps\",\n",
1109
+ " # save_steps=200,\n",
1110
+ " push_to_hub=True,\n",
1111
+ " save_total_limit=1,\n",
1112
+ " bf16=True,\n",
1113
+ " report_to=[\"tensorboard\"],\n",
1114
+ " remove_unused_columns=False,\n",
1115
+ " dataloader_pin_memory=False\n",
1116
+ "\n",
1117
+ ")\n",
1118
+ "\n",
1119
+ "FullTrainer = Trainer(\n",
1120
+ " model=ModelLoraFineTune,\n",
1121
+ " args=TrainingArgs,\n",
1122
+ " train_dataset=TraningDataset,\n",
1123
+ " eval_dataset=TestingDataset,\n",
1124
+ " data_collator=TokenGeneratorFn,\n",
1125
+ "\n",
1126
+ ")\n",
1127
+ "\n",
1128
+ "FullTrainer.train()\n",
1129
+ "\n",
1130
+ "# NewModel = \"ECGFTPaliGemma\"\n",
1131
+ "FullTrainer.save_model(\"/workspace\")\n",
1132
+ "#FullTrainer.save_model(NewModel)\n",
1133
+ "\n",
1134
+ "\n",
1135
+ "##===================== After Training Merge The LoRA Weights With the Original Model Weights========================\n",
1136
+ "#\n",
1137
+ "# #Reload The Model and The Tokenizer\n",
1138
+ "#\n",
1139
+ "# BaseTokenizer = AutoTokenizer.from_pretrained(ModelID)\n",
1140
+ "# BaseModel = AutoModelForCausalLM.from_pretrained(\n",
1141
+ "# ModelID,\n",
1142
+ "# quantization_config = bnbConfig,\n",
1143
+ "# device_map=\"auto\",\n",
1144
+ "# attn_implemenation=AttnAlgorithm\n",
1145
+ "# )\n",
1146
+ "#\n",
1147
+ "#\n",
1148
+ "# BaseModel,BaseTokenizer = setup_chat_format(BaseModel,Tokenizer)\n",
1149
+ "#\n",
1150
+ "# #Merge the New LoRA Model with BaseModel\n",
1151
+ "# MergedModel = PeftModel.from_pretrained(BaseModel,NewModel)\n",
1152
+ "# FinalMergedModel = MergedModel.merge_and_unload()\n",
1153
+ "#\n",
1154
+ "# #Push To Hugging Face Repo\n",
1155
+ "# FinalMergedModel.push_to_hub(NewModel,use_temp_dir=False)\n",
1156
+ "# BaseTokenizer.push_to_hub(NewModel,use_temp_dir=False)\n",
1157
+ "\n",
1158
+ "\n",
1159
+ "#================================Fine Tuning PaliGemma====================================\n",
1160
+ "#\n",
1161
+ "# def RunPwrShellCmd(Command:str):\n",
1162
+ "#\n",
1163
+ "# try:\n",
1164
+ "#\n",
1165
+ "# TerminalCmd = subprocess.Popen([\"powershell.exe\", Command], shell=True , stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
1166
+ "# stdout, stderr = TerminalCmd.communicate()\n",
1167
+ "# print('Output:', stdout)\n",
1168
+ "# # print('Error:', stderr)\n",
1169
+ "# # print('Return Code:', TerminalCmd.returncode)\n",
1170
+ "#\n",
1171
+ "# except subprocess.CalledProcessError as e:\n",
1172
+ "# print(f'Command failed with error: {e.stderr}')\n",
1173
+ "#\n",
1174
+ "#\n",
1175
+ "#\n",
1176
+ "# def DownloadModelContent():\n",
1177
+ "#\n",
1178
+ "# #Download Paligemma from Kaggle\n",
1179
+ "# ModelPath = \"./paligemma-3b-pt-224.f16.npz\"\n",
1180
+ "# if not os.path.exists(ModelPath):\n",
1181
+ "# print(\"Downloading the checkpoint from Kaggle, this could take a few minutes....\")\n",
1182
+ "#\n",
1183
+ "# # Note: kaggle archive contains the same checkpoint in multiple formats.\n",
1184
+ "# # Download only the float16 model.\n",
1185
+ "# ModelPath = kagglehub.model_download('google/paligemma/jax/paligemma-3b-pt-224', ModelPath)\n",
1186
+ "# print(f\"Model path: {ModelPath}\")\n",
1187
+ "#\n",
1188
+ "#\n",
1189
+ "# TokenizerPath = \"./paligemma_tokenizer.model\"\n",
1190
+ "# if not os.path.exists(TokenizerPath):\n",
1191
+ "# print(\"hello\")"
1192
+ ]
1193
+ },
1194
+ {
1195
+ "cell_type": "code",
1196
+ "execution_count": null,
1197
+ "id": "480fc6f5-c0d7-4730-a81b-d661779c96e2",
1198
+ "metadata": {},
1199
+ "outputs": [],
1200
+ "source": []
1201
+ }
1202
+ ],
1203
+ "metadata": {
1204
+ "kernelspec": {
1205
+ "display_name": "Python 3 (ipykernel)",
1206
+ "language": "python",
1207
+ "name": "python3"
1208
+ },
1209
+ "language_info": {
1210
+ "codemirror_mode": {
1211
+ "name": "ipython",
1212
+ "version": 3
1213
+ },
1214
+ "file_extension": ".py",
1215
+ "mimetype": "text/x-python",
1216
+ "name": "python",
1217
+ "nbconvert_exporter": "python",
1218
+ "pygments_lexer": "ipython3",
1219
+ "version": "3.10.12"
1220
+ }
1221
+ },
1222
+ "nbformat": 4,
1223
+ "nbformat_minor": 5
1224
+ }
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "google/paligemma-3b-pt-224",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "k_proj",
24
+ "q_proj",
25
+ "down_proj",
26
+ "o_proj",
27
+ "up_proj",
28
+ "gate_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05fce35a49cac791224fdcaac81a84989be4422c06c3abfd0b81226071f51e5
3
+ size 45258384
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3665faa5d60313520da2387beb2f09ab5646e885aab78edeed1f38876efb70f
3
+ size 5048