Spaces:
Running
Running
training pipeline version control bug fix
Browse files- training_pipeline.ipynb +64 -32
training_pipeline.ipynb
CHANGED
@@ -2,9 +2,17 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
"source": [
|
9 |
"import hopsworks\n",
|
10 |
"from sentence_transformers import SentenceTransformer, InputExample, losses\n",
|
@@ -16,18 +24,18 @@
|
|
16 |
},
|
17 |
{
|
18 |
"cell_type": "code",
|
19 |
-
"execution_count":
|
20 |
"metadata": {},
|
21 |
"outputs": [
|
22 |
{
|
23 |
"name": "stdout",
|
24 |
"output_type": "stream",
|
25 |
"text": [
|
26 |
-
"2025-01-
|
27 |
-
"
|
28 |
-
"2025-01-
|
29 |
-
"
|
30 |
-
"2025-01-
|
31 |
"\n",
|
32 |
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158296\n"
|
33 |
]
|
@@ -51,7 +59,7 @@
|
|
51 |
"name": "stdout",
|
52 |
"output_type": "stream",
|
53 |
"text": [
|
54 |
-
"Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (
|
55 |
]
|
56 |
}
|
57 |
],
|
@@ -109,8 +117,8 @@
|
|
109 |
"name": "stdout",
|
110 |
"output_type": "stream",
|
111 |
"text": [
|
112 |
-
"2025-01-
|
113 |
-
"2025-01-
|
114 |
]
|
115 |
}
|
116 |
],
|
@@ -159,12 +167,12 @@
|
|
159 |
{
|
160 |
"data": {
|
161 |
"application/vnd.jupyter.widget-view+json": {
|
162 |
-
"model_id": "
|
163 |
"version_major": 2,
|
164 |
"version_minor": 0
|
165 |
},
|
166 |
"text/plain": [
|
167 |
-
" 0%| | 0/
|
168 |
]
|
169 |
},
|
170 |
"metadata": {},
|
@@ -174,28 +182,14 @@
|
|
174 |
"name": "stdout",
|
175 |
"output_type": "stream",
|
176 |
"text": [
|
177 |
-
"{'train_runtime':
|
178 |
-
"2025-01-
|
179 |
]
|
180 |
},
|
181 |
{
|
182 |
"data": {
|
183 |
"application/vnd.jupyter.widget-view+json": {
|
184 |
-
"model_id": "
|
185 |
-
"version_major": 2,
|
186 |
-
"version_minor": 0
|
187 |
-
},
|
188 |
-
"text/plain": [
|
189 |
-
"Computing widget examples: 0%| | 0/1 [00:00<?, ?example/s]"
|
190 |
-
]
|
191 |
-
},
|
192 |
-
"metadata": {},
|
193 |
-
"output_type": "display_data"
|
194 |
-
},
|
195 |
-
{
|
196 |
-
"data": {
|
197 |
-
"application/vnd.jupyter.widget-view+json": {
|
198 |
-
"model_id": "a4218c62846f43c7be217513f8fd86de",
|
199 |
"version_major": 2,
|
200 |
"version_minor": 0
|
201 |
},
|
@@ -265,12 +259,50 @@
|
|
265 |
"cell_type": "code",
|
266 |
"execution_count": 15,
|
267 |
"metadata": {},
|
268 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
"source": [
|
270 |
"# Get Model Registry\n",
|
271 |
"mr = project.get_model_registry()"
|
272 |
]
|
273 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
{
|
275 |
"cell_type": "code",
|
276 |
"execution_count": 19,
|
@@ -284,7 +316,7 @@
|
|
284 |
" model_schema=model_schema,\n",
|
285 |
" input_example=X_train_sample,\n",
|
286 |
" description=\"Finetuned SentenceTransformer for job matching\",\n",
|
287 |
-
" version=
|
288 |
")"
|
289 |
]
|
290 |
},
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"2025-01-12 10:07:12,956 INFO: PyTorch version 2.5.1 available.\n"
|
13 |
+
]
|
14 |
+
}
|
15 |
+
],
|
16 |
"source": [
|
17 |
"import hopsworks\n",
|
18 |
"from sentence_transformers import SentenceTransformer, InputExample, losses\n",
|
|
|
24 |
},
|
25 |
{
|
26 |
"cell_type": "code",
|
27 |
+
"execution_count": 2,
|
28 |
"metadata": {},
|
29 |
"outputs": [
|
30 |
{
|
31 |
"name": "stdout",
|
32 |
"output_type": "stream",
|
33 |
"text": [
|
34 |
+
"2025-01-12 10:07:14,851 INFO: Initializing external client\n",
|
35 |
+
"2025-01-12 10:07:14,852 INFO: Base URL: https://c.app.hopsworks.ai:443\n",
|
36 |
+
"2025-01-12 10:07:15,245 WARNING: InsecureRequestWarning: Unverified HTTPS request is being made to host 'c.app.hopsworks.ai'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
37 |
+
"\n",
|
38 |
+
"2025-01-12 10:07:18,039 INFO: Python Engine initialized.\n",
|
39 |
"\n",
|
40 |
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1158296\n"
|
41 |
]
|
|
|
59 |
"name": "stdout",
|
60 |
"output_type": "stream",
|
61 |
"text": [
|
62 |
+
"Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.46s) \n"
|
63 |
]
|
64 |
}
|
65 |
],
|
|
|
117 |
"name": "stdout",
|
118 |
"output_type": "stream",
|
119 |
"text": [
|
120 |
+
"2025-01-12 10:07:23,794 INFO: Use pytorch device_name: cpu\n",
|
121 |
+
"2025-01-12 10:07:23,795 INFO: Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2\n"
|
122 |
]
|
123 |
}
|
124 |
],
|
|
|
167 |
{
|
168 |
"data": {
|
169 |
"application/vnd.jupyter.widget-view+json": {
|
170 |
+
"model_id": "13a4c4779de349a4a93c26a2a952d713",
|
171 |
"version_major": 2,
|
172 |
"version_minor": 0
|
173 |
},
|
174 |
"text/plain": [
|
175 |
+
" 0%| | 0/6 [00:00<?, ?it/s]"
|
176 |
]
|
177 |
},
|
178 |
"metadata": {},
|
|
|
182 |
"name": "stdout",
|
183 |
"output_type": "stream",
|
184 |
"text": [
|
185 |
+
"{'train_runtime': 16.1772, 'train_samples_per_second': 4.265, 'train_steps_per_second': 0.371, 'train_loss': 0.18365144729614258, 'epoch': 3.0}\n",
|
186 |
+
"2025-01-12 10:07:44,670 INFO: Save model to ./finetuned_model\n"
|
187 |
]
|
188 |
},
|
189 |
{
|
190 |
"data": {
|
191 |
"application/vnd.jupyter.widget-view+json": {
|
192 |
+
"model_id": "a36ca79a9a5245c3931717a3c466bba9",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
"version_major": 2,
|
194 |
"version_minor": 0
|
195 |
},
|
|
|
259 |
"cell_type": "code",
|
260 |
"execution_count": 15,
|
261 |
"metadata": {},
|
262 |
+
"outputs": [
|
263 |
+
{
|
264 |
+
"name": "stdout",
|
265 |
+
"output_type": "stream",
|
266 |
+
"text": [
|
267 |
+
"2025-01-12 10:07:45,746 WARNING: InsecureRequestWarning: Unverified HTTPS request is being made to host 'c.app.hopsworks.ai'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings\n",
|
268 |
+
"\n"
|
269 |
+
]
|
270 |
+
}
|
271 |
+
],
|
272 |
"source": [
|
273 |
"# Get Model Registry\n",
|
274 |
"mr = project.get_model_registry()"
|
275 |
]
|
276 |
},
|
277 |
+
{
|
278 |
+
"cell_type": "code",
|
279 |
+
"execution_count": 17,
|
280 |
+
"metadata": {},
|
281 |
+
"outputs": [
|
282 |
+
{
|
283 |
+
"name": "stdout",
|
284 |
+
"output_type": "stream",
|
285 |
+
"text": [
|
286 |
+
"2025-01-12 10:08:24,657 WARNING: VersionWarning: No version provided for getting model `job_matching_sentence_transformer`, defaulting to `1`.\n",
|
287 |
+
"\n",
|
288 |
+
"Model already exists with version 1\n"
|
289 |
+
]
|
290 |
+
}
|
291 |
+
],
|
292 |
+
"source": [
|
293 |
+
"# Check if the model already exists and get the latest version\n",
|
294 |
+
"try:\n",
|
295 |
+
" existing_model = mr.get_model(name=\"job_matching_sentence_transformer\")\n",
|
296 |
+
" latest_version = existing_model.version\n",
|
297 |
+
" print(f\"Model already exists with version {latest_version}\")\n",
|
298 |
+
"except:\n",
|
299 |
+
" # If the model doesn't exist, set version to 1\n",
|
300 |
+
" latest_version = 0\n",
|
301 |
+
"\n",
|
302 |
+
"# Set the new version dynamically\n",
|
303 |
+
"new_version = latest_version + 1"
|
304 |
+
]
|
305 |
+
},
|
306 |
{
|
307 |
"cell_type": "code",
|
308 |
"execution_count": 19,
|
|
|
316 |
" model_schema=model_schema,\n",
|
317 |
" input_example=X_train_sample,\n",
|
318 |
" description=\"Finetuned SentenceTransformer for job matching\",\n",
|
319 |
+
" version=new_version,\n",
|
320 |
")"
|
321 |
]
|
322 |
},
|