forestav commited on
Commit
225a75f
·
1 Parent(s): 4e4a87c

training pipeline version control bug fix

Browse files
Files changed (1) hide show
  1. training_pipeline.ipynb +64 -32
training_pipeline.ipynb CHANGED
@@ -2,9 +2,17 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 23,
6
  "metadata": {},
7
- "outputs": [],
 
 
 
 
 
 
 
 
8
  "source": [
9
  "import hopsworks\n",
10
  "from sentence_transformers import SentenceTransformer, InputExample, losses\n",
@@ -16,18 +24,18 @@
16
  },
17
  {
18
  "cell_type": "code",
19
- "execution_count": 24,
20
  "metadata": {},
21
  "outputs": [
22
  {
23
  "name": "stdout",
24
  "output_type": "stream",
25
  "text": [
26
- "2025-01-08 19:52:22,417 INFO: Closing external client and cleaning up certificates.\n",
27
- "Connection closed.\n",
28
- "2025-01-08 19:52:22,421 INFO: Initializing external client\n",
29
- "2025-01-08 19:52:22,421 INFO: Base URL: https://c.app.hopsworks.ai:443\n",
30
- "2025-01-08 19:52:23,548 INFO: Python Engine initialized.\n",
31
  "\n",
32
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158296\n"
33
  ]
@@ -51,7 +59,7 @@
51
  "name": "stdout",
52
  "output_type": "stream",
53
  "text": [
54
- "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.84s) \n"
55
  ]
56
  }
57
  ],
@@ -109,8 +117,8 @@
109
  "name": "stdout",
110
  "output_type": "stream",
111
  "text": [
112
- "2025-01-08 19:25:05,476 INFO: Use pytorch device_name: cpu\n",
113
- "2025-01-08 19:25:05,477 INFO: Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2\n"
114
  ]
115
  }
116
  ],
@@ -159,12 +167,12 @@
159
  {
160
  "data": {
161
  "application/vnd.jupyter.widget-view+json": {
162
- "model_id": "65a11878fdad456a94ae2e4d44e403a3",
163
  "version_major": 2,
164
  "version_minor": 0
165
  },
166
  "text/plain": [
167
- " 0%| | 0/3 [00:00<?, ?it/s]"
168
  ]
169
  },
170
  "metadata": {},
@@ -174,28 +182,14 @@
174
  "name": "stdout",
175
  "output_type": "stream",
176
  "text": [
177
- "{'train_runtime': 5.2094, 'train_samples_per_second': 2.879, 'train_steps_per_second': 0.576, 'train_loss': 0.27454523245493573, 'epoch': 3.0}\n",
178
- "2025-01-08 19:25:14,162 INFO: Save model to ./finetuned_model\n"
179
  ]
180
  },
181
  {
182
  "data": {
183
  "application/vnd.jupyter.widget-view+json": {
184
- "model_id": "7bc7a5e2e56e4abe8bbf47e5ed251d6a",
185
- "version_major": 2,
186
- "version_minor": 0
187
- },
188
- "text/plain": [
189
- "Computing widget examples: 0%| | 0/1 [00:00<?, ?example/s]"
190
- ]
191
- },
192
- "metadata": {},
193
- "output_type": "display_data"
194
- },
195
- {
196
- "data": {
197
- "application/vnd.jupyter.widget-view+json": {
198
- "model_id": "a4218c62846f43c7be217513f8fd86de",
199
  "version_major": 2,
200
  "version_minor": 0
201
  },
@@ -265,12 +259,50 @@
265
  "cell_type": "code",
266
  "execution_count": 15,
267
  "metadata": {},
268
- "outputs": [],
 
 
 
 
 
 
 
 
 
269
  "source": [
270
  "# Get Model Registry\n",
271
  "mr = project.get_model_registry()"
272
  ]
273
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  {
275
  "cell_type": "code",
276
  "execution_count": 19,
@@ -284,7 +316,7 @@
284
  " model_schema=model_schema,\n",
285
  " input_example=X_train_sample,\n",
286
  " description=\"Finetuned SentenceTransformer for job matching\",\n",
287
- " version=1\n",
288
  ")"
289
  ]
290
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "2025-01-12 10:07:12,956 INFO: PyTorch version 2.5.1 available.\n"
13
+ ]
14
+ }
15
+ ],
16
  "source": [
17
  "import hopsworks\n",
18
  "from sentence_transformers import SentenceTransformer, InputExample, losses\n",
 
24
  },
25
  {
26
  "cell_type": "code",
27
+ "execution_count": 2,
28
  "metadata": {},
29
  "outputs": [
30
  {
31
  "name": "stdout",
32
  "output_type": "stream",
33
  "text": [
34
+ "2025-01-12 10:07:14,851 INFO: Initializing external client\n",
35
+ "2025-01-12 10:07:14,852 INFO: Base URL: https://c.app.hopsworks.ai:443\n",
36
+ "2025-01-12 10:07:15,245 WARNING: InsecureRequestWarning: Unverified HTTPS request is being made to host 'c.app.hopsworks.ai'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
37
+ "\n",
38
+ "2025-01-12 10:07:18,039 INFO: Python Engine initialized.\n",
39
  "\n",
40
  "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158296\n"
41
  ]
 
59
  "name": "stdout",
60
  "output_type": "stream",
61
  "text": [
62
+ "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.46s) \n"
63
  ]
64
  }
65
  ],
 
117
  "name": "stdout",
118
  "output_type": "stream",
119
  "text": [
120
+ "2025-01-12 10:07:23,794 INFO: Use pytorch device_name: cpu\n",
121
+ "2025-01-12 10:07:23,795 INFO: Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2\n"
122
  ]
123
  }
124
  ],
 
167
  {
168
  "data": {
169
  "application/vnd.jupyter.widget-view+json": {
170
+ "model_id": "13a4c4779de349a4a93c26a2a952d713",
171
  "version_major": 2,
172
  "version_minor": 0
173
  },
174
  "text/plain": [
175
+ " 0%| | 0/6 [00:00<?, ?it/s]"
176
  ]
177
  },
178
  "metadata": {},
 
182
  "name": "stdout",
183
  "output_type": "stream",
184
  "text": [
185
+ "{'train_runtime': 16.1772, 'train_samples_per_second': 4.265, 'train_steps_per_second': 0.371, 'train_loss': 0.18365144729614258, 'epoch': 3.0}\n",
186
+ "2025-01-12 10:07:44,670 INFO: Save model to ./finetuned_model\n"
187
  ]
188
  },
189
  {
190
  "data": {
191
  "application/vnd.jupyter.widget-view+json": {
192
+ "model_id": "a36ca79a9a5245c3931717a3c466bba9",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  "version_major": 2,
194
  "version_minor": 0
195
  },
 
259
  "cell_type": "code",
260
  "execution_count": 15,
261
  "metadata": {},
262
+ "outputs": [
263
+ {
264
+ "name": "stdout",
265
+ "output_type": "stream",
266
+ "text": [
267
+ "2025-01-12 10:07:45,746 WARNING: InsecureRequestWarning: Unverified HTTPS request is being made to host 'c.app.hopsworks.ai'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
268
+ "\n"
269
+ ]
270
+ }
271
+ ],
272
  "source": [
273
  "# Get Model Registry\n",
274
  "mr = project.get_model_registry()"
275
  ]
276
  },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": 17,
280
+ "metadata": {},
281
+ "outputs": [
282
+ {
283
+ "name": "stdout",
284
+ "output_type": "stream",
285
+ "text": [
286
+ "2025-01-12 10:08:24,657 WARNING: VersionWarning: No version provided for getting model `job_matching_sentence_transformer`, defaulting to `1`.\n",
287
+ "\n",
288
+ "Model already exists with version 1\n"
289
+ ]
290
+ }
291
+ ],
292
+ "source": [
293
+ "# Check if the model already exists and get the latest version\n",
294
+ "try:\n",
295
+ " existing_model = mr.get_model(name=\"job_matching_sentence_transformer\")\n",
296
+ " latest_version = existing_model.version\n",
297
+ " print(f\"Model already exists with version {latest_version}\")\n",
298
+ "except:\n",
299
+ " # If the model doesn't exist, set version to 1\n",
300
+ " latest_version = 0\n",
301
+ "\n",
302
+ "# Set the new version dynamically\n",
303
+ "new_version = latest_version + 1"
304
+ ]
305
+ },
306
  {
307
  "cell_type": "code",
308
  "execution_count": 19,
 
316
  " model_schema=model_schema,\n",
317
  " input_example=X_train_sample,\n",
318
  " description=\"Finetuned SentenceTransformer for job matching\",\n",
319
+ " version=new_version,\n",
320
  ")"
321
  ]
322
  },